Commit 18b3bfdc authored by Greg Wilson's avatar Greg Wilson
Browse files

Merge branch 'gh-pages' of into gh-pages

parents 269239bb 08f1537e
......@@ -7,6 +7,7 @@
<meta name="slug" content="{{site.github.project_title}}" />
<meta name="startdate" content="{{page.startdate}}" />
<meta name="enddate" content="{{page.enddate}}" />
<meta name="humandate" content="{{page.humandate}}" />
<meta name="country" content="{{}}" />
<meta name="venue" content="{{page.venue}}" />
<meta name="address" content="{{page.address}}" />
......@@ -9,27 +9,104 @@ import sys
import os
import glob
import fnmatch
import re
import yaml
from optparse import OptionParser
from bs4 import BeautifulSoup
from lxml import etree
import dateutil.parser
# Default lesson configuration.
- has_title_in_head
- has_navbar
- has_title_in_body
- has_footer
- lesson_has_title_in_head
- lesson_has_navbar
- lesson_has_title_in_body
- lesson_has_footer
- has_prereq
- has_syllabus
- lesson_has_prereq
- lesson_has_syllabus
- has_objectives
- lesson_has_objectives
# Default workshop configuration.
- workshop_check_slug
- workshop_check_country
- workshop_check_language
- workshop_check_humandate
- workshop_check_humantime
- workshop_check_startdate
- workshop_check_enddate
- workshop_check_latitude_longitude
- workshop_check_instructors
- workshop_check_helpers
- workshop_check_contact
- workshop_check_eventbrite
- workshop_check_etherpad
# Regular expression patterns for workshops.
EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
URL_PATTERN = r'https?://.+'
# Country and language codes. Note that codes mean different things: 'ar'
# is 'Arabic' as a language but 'Argentina' as a country.
'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
'ye', 'yt', 'za', 'zm', 'zw'
'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
'yo', 'za', 'zh', 'zu'
# Record all the rules.
RULES = {}
......@@ -38,6 +115,9 @@ def rule(fn):
return fn
# Accumulate error messages.
def main():
'''Main driver: check all files with all rules that apply.'''
......@@ -56,6 +136,8 @@ def main():
if args.verbose > 1:
print('...', rule, file=sys.stderr)
RULES[rule](filename, docs[filename])
for m in MESSAGES:
def parse_args():
......@@ -104,6 +186,8 @@ def read_config(args):
args.config = yaml.load(reader)
elif args.check_lesson:
args.config = yaml.load(LESSON_CONFIG)
elif args.check_workshop:
args.config = yaml.load(WORKSHOP_CONFIG)
assert False, 'Do not know what configuration to load'
......@@ -133,63 +217,229 @@ def read_all_docs(source_dir):
def has_footer(filename, doc):
def lesson_has_footer(filename, doc):
'''Document has footer element.'''
_check_1(filename, doc, 'footers', '//footer')
_check_one_element(filename, doc, 'footers', '//footer')
def has_navbar(filename, doc):
def lesson_has_navbar(filename, doc):
'''Document has header element.'''
_check_1(filename, doc, 'div navbar', '//div[@class="navbar-header"]')
_check_one_element(filename, doc, 'div navbar', '//div[@class="navbar-header"]')
def has_objectives(filename, doc):
def lesson_has_objectives(filename, doc):
'''Episode has objectives.'''
_check_1(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')
_check_one_element(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')
def has_prereq(filename, doc):
def lesson_has_prereq(filename, doc):
'''Index page has prerequisites block.'''
_check_1(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')
_check_one_element(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')
def has_syllabus(filename, doc):
def lesson_has_syllabus(filename, doc):
'''Index page has syllabus.'''
_check_1(filename, doc, 'syllabus', '//div[@class="syllabus"]')
_check_1(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
_check_1(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')
_check_one_element(filename, doc, 'syllabus', '//div[@class="syllabus"]')
_check_one_element(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
_check_one_element(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')
def has_title_in_head(filename, doc):
def lesson_has_title_in_head(filename, doc):
'''Document has a title in the head.'''
_check_1(filename, doc, 'title in head', '//head//title')
_check_one_element(filename, doc, 'title in head', '//head//title')
def has_title_in_body(filename, doc):
def lesson_has_title_in_body(filename, doc):
'''Document has a title in the body.'''
_check_1(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
_check_one_element(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
def workshop_check_slug(filename, doc):
content = _check_meta(filename, doc, 'slug')
_check_regexp(SLUG_PATTERN, content, 'invalid slug')
def workshop_check_country(filename, doc):
'''"country" must be a lowercase ISO-3166 two-letter code.'''
country = _check_meta(filename, doc, 'country')
_check(country in ISO_COUNTRY, 'Unknown country')
def workshop_check_language(filename, doc):
'''"language" must be a lowercase ISO-639 two-letter code.'''
language = _check_meta(filename, doc, 'language')
_check(language in ISO_LANGUAGE, 'Unknown language')
def workshop_check_humandate(filename, doc):
'''"humandate" must be a human-readable date with a 3-letter month and
4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and
20, 2025". It may be in languages other than English, but the
month name should be kept short to aid formatting of the main
Software Carpentry web site.'''
humandate = _check_meta(filename, doc, 'humandate')
if _check(',' in humandate, 'Require comma in human date'):
month_dates, year = humandate.split(",")
# The first three characters of month_dates are not empty
month = month_dates[:3]
_check(not any(char == " " for char in month), 'Cannot be spaces in month')
# But the fourth character is empty ("February" is illegal)
require(month_dates[3] == " ", 'Month names must be three letters long')
# Year must contain only digits.
_check_regexp('\d+', year, 'Year must be only digits')
def workshop_check_humantime(filename, doc):
'''"humantime" is a human-readable start and end time for the workshop,
such as "09:00 - 16:00".'''
time = _check_meta(filename, doc, 'humantime')
if time:
_check_regexp(HUMANTIME_PATTERN, time.replace(" ", ""), 'Badly-formatted human time')
def workshop_check_startdate(filename, doc):
'''"startdate" must be machine-readable start date for the workshop,
and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
startdate = _check_meta(filename, doc, 'startdate')
startdate = dateutil.parser.parse(startdate)
except ValueError as e:
_check(False, 'Badly-formatted start date')
def workshop_check_enddate(filename, doc):
'''"enddate" must be machine-readable end date for the workshop,
and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
enddate = _check_meta(filename, doc, 'enddate')
enddate = dateutil.parser.parse(enddate)
except ValueError as e:
_check(False, 'Badly-formatted end date')
def workshop_check_latitude_longitude(filename, doc):
'''"latlng" must be a valid latitude and longitude represented as two
floating-point numbers separated by a comma.'''
latlng = _check_meta(filename, doc, 'latlng')
lat, lng = latlng.split(',')
lat = float(lat)
long = float(lng)
_check((-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0),
'Invalid numeric values for latitude/longitude')
except ValueError:
_check(False, 'Unable to parse lat/long')
def workshop_check_instructors(filename, doc):
'''"instructor" must be a non-empty comma-separated list of quoted names,
e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other
pass # FIXME
def _check_1(filename, doc, rulename, xpath):
def workshop_check_helpers(filename, doc):
'''"helper" must be a comma-separated list of quoted names,
e.g. ['First name', 'Second name', ...']. The list may be empty. Do
not use "TBD" or other placeholders.'''
pass # FIXME
def workshop_check_contact(filename, doc):
'''"contact" must be a valid email address consisting of characters, a
@, and more characters. It should not be the default contact
email address "".'''
contact = _check_meta(filename, doc, 'contact')
_check_regexp(EMAIL_PATTERN, contact, 'Invalid contact email')
_check(contact != DEFAULT_CONTACT_EMAIL, 'Cannot use default contact email')
def workshop_check_eventbrite(filename, doc):
'''The Eventbrite registration key must be 9 or more digits.'''
eventbrite = _check_meta(filename, doc, 'eventbrite')
_check_regexp(EVENTBRITE_PATTERN, eventbrite, 'Eventbrite key must be 9 or more digits')
def workshop_check_etherpad(filename, doc):
'''Etherpad must be a valid URL.'''
etherpad = _check_meta(filename, doc, 'etherpad')
_check_regexp(URL_PATTERN, etherpad, 'Etherpad address must be a valid URL')
def _check_meta(filename, doc, metaname):
'''Check one metadata attribute.'''
xpath = '//html/head/meta[@name="{0}"]'.format(metaname)
content = _check_one_element(filename, doc, metaname, xpath, attribute='content')
return content
def _check_one_element(filename, doc, rulename, xpath, attribute=None):
'''Check that an equality holds.'''
result = None
actual = doc.xpath(xpath)
if len(actual) != 1:
print('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
MESSAGES.append('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
elif attribute is not None:
result = actual[0].attrib.get(attribute, None)
return result
def _check_regexp(pattern, value, message):
'''Check a regular expression match if the value is a string.'''
_check(value is not None, message + ': value is None') and \
_check(re.match(pattern, value), message)
def _check(condition, message):
'''Record error message if condition not met, returning condition for chaining.'''
if not condition:
return condition
def _require(condition, message):
'''Fail if condition not met.'''
