diff --git a/_layouts/workshop_homepage.html b/_layouts/workshop_homepage.html
index ab3cf74a25adbfe9dd947118aeaedd6366db9855..95f4fde7b1365c0842a85434c540d0b79c0125c9 100644
--- a/_layouts/workshop_homepage.html
+++ b/_layouts/workshop_homepage.html
@@ -7,6 +7,7 @@
+
diff --git a/bin/validator b/bin/validator
index 2c5ad24abe4fcbc8b3291bd13edf3bd2f6922f66..b43af6bb07ff16f94f594e22905dd754d6e42773 100755
--- a/bin/validator
+++ b/bin/validator
@@ -9,27 +9,104 @@ import sys
import os
import glob
import fnmatch
+import re
import yaml
from optparse import OptionParser
from bs4 import BeautifulSoup
from lxml import etree
+import dateutil.parser
# Default lesson configuration.
LESSON_CONFIG = '''\
patterns:
'*.html':
- - has_title_in_head
- - has_navbar
- - has_title_in_body
- - has_footer
+ - lesson_has_title_in_head
+ - lesson_has_navbar
+ - lesson_has_title_in_body
+ - lesson_has_footer
index.html:
- - has_prereq
- - has_syllabus
+ - lesson_has_prereq
+ - lesson_has_syllabus
'*-*/index.html':
- - has_objectives
+ - lesson_has_objectives
'''
+# Default workshop configuration.
+WORKSHOP_CONFIG = '''\
+patterns:
+ 'index.html':
+ - workshop_check_slug
+ - workshop_check_country
+ - workshop_check_language
+ - workshop_check_humandate
+ - workshop_check_humantime
+ - workshop_check_startdate
+ - workshop_check_enddate
+ - workshop_check_latitude_longitude
+ - workshop_check_instructors
+ - workshop_check_helpers
+ - workshop_check_contact
+ - workshop_check_eventbrite
+ - workshop_check_etherpad
+'''
+
+
+# Regular expression patterns for workshops.
+SLUG_PATTERN = r'.+'
+EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
+HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
+EVENTBRITE_PATTERN = r'\d{9,10}'
+URL_PATTERN = r'https?://.+'
+
+DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
+
+# Country and language codes. Note that codes mean different things: 'ar'
+# is 'Arabic' as a language but 'Argentina' as a country.
+
+ISO_COUNTRY = [
+ 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
+ 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
+ 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
+ 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
+ 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
+ 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
+ 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
+ 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
+ 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
+ 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
+ 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
+ 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
+ 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
+ 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
+ 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
+ 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
+ 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
+ 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
+ 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
+ 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
+ 'ye', 'yt', 'za', 'zm', 'zw'
+]
+
+ISO_LANGUAGE = [
+ 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
+ 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
+ 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
+ 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
+ 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
+ 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
+ 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
+ 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
+ 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
+ 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
+ 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
+ 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
+ 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
+ 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
+ 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
+ 'yo', 'za', 'zh', 'zu'
+]
+
# Record all the rules.
RULES = {}
@@ -38,6 +115,9 @@ def rule(fn):
return fn
+# Accumulate error messages.
+MESSAGES = []
+
def main():
'''Main driver: check all files with all rules that apply.'''
@@ -56,6 +136,8 @@ def main():
if args.verbose > 1:
print('...', rule, file=sys.stderr)
RULES[rule](filename, docs[filename])
+ for m in MESSAGES:
+ print(m)
def parse_args():
@@ -104,6 +186,8 @@ def read_config(args):
args.config = yaml.load(reader)
elif args.check_lesson:
args.config = yaml.load(LESSON_CONFIG)
+ elif args.check_workshop:
+ args.config = yaml.load(WORKSHOP_CONFIG)
else:
assert False, 'Do not know what configuration to load'
@@ -133,63 +217,229 @@ def read_all_docs(source_dir):
@rule
-def has_footer(filename, doc):
+def lesson_has_footer(filename, doc):
'''Document has footer element.'''
- _check_1(filename, doc, 'footers', '//footer')
+ _check_one_element(filename, doc, 'footers', '//footer')
@rule
-def has_navbar(filename, doc):
+def lesson_has_navbar(filename, doc):
'''Document has header element.'''
- _check_1(filename, doc, 'div navbar', '//div[@class="navbar-header"]')
+ _check_one_element(filename, doc, 'div navbar', '//div[@class="navbar-header"]')
@rule
-def has_objectives(filename, doc):
+def lesson_has_objectives(filename, doc):
'''Episode has objectives.'''
- _check_1(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')
+ _check_one_element(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')
@rule
-def has_prereq(filename, doc):
+def lesson_has_prereq(filename, doc):
'''Index page has prerequisites block.'''
- _check_1(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')
+ _check_one_element(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')
@rule
-def has_syllabus(filename, doc):
+def lesson_has_syllabus(filename, doc):
'''Index page has syllabus.'''
- _check_1(filename, doc, 'syllabus', '//div[@class="syllabus"]')
- _check_1(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
- _check_1(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')
+ _check_one_element(filename, doc, 'syllabus', '//div[@class="syllabus"]')
+ _check_one_element(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
+ _check_one_element(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')
@rule
-def has_title_in_head(filename, doc):
+def lesson_has_title_in_head(filename, doc):
'''Document has a title in the head.'''
- _check_1(filename, doc, 'title in head', '//head//title')
+ _check_one_element(filename, doc, 'title in head', '//head//title')
@rule
-def has_title_in_body(filename, doc):
+def lesson_has_title_in_body(filename, doc):
'''Document has a title in the body.'''
- _check_1(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
+ _check_one_element(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
+
+
+@rule
+def workshop_check_slug(filename, doc):
+ content = _check_meta(filename, doc, 'slug')
+ _check_regexp(SLUG_PATTERN, content, 'invalid slug')
+
+
+@rule
+def workshop_check_country(filename, doc):
+ '''"country" must be a lowercase ISO-3166 two-letter code.'''
+
+ country = _check_meta(filename, doc, 'country')
+ _check(country in ISO_COUNTRY, 'Unknown country')
+
+
+@rule
+def workshop_check_language(filename, doc):
+ '''"language" must be a lowercase ISO-639 two-letter code.'''
+
+ language = _check_meta(filename, doc, 'language')
+ _check(language in ISO_LANGUAGE, 'Unknown language')
+
+
+@rule
+def workshop_check_humandate(filename, doc):
+ '''"humandate" must be a human-readable date with a 3-letter month and
+ 4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and
+ 20, 2025". It may be in languages other than English, but the
+ month name should be kept short to aid formatting of the main
+ Software Carpentry web site.'''
+
+ humandate = _check_meta(filename, doc, 'humandate')
+ if _check(',' in humandate, 'Require comma in human date'):
+ month_dates, year = humandate.split(",")
+
+ # The first three characters of month_dates are not empty
+ month = month_dates[:3]
+ _check(not any(char == " " for char in month), 'Cannot be spaces in month')
+
+ # But the fourth character is empty ("February" is illegal)
+ require(month_dates[3] == " ", 'Month names must be three letters long')
+
+ # Year must contain only digits.
+ _check_regexp('\d+', year, 'Year must be only digits')
+
+
+@rule
+def workshop_check_humantime(filename, doc):
+ '''"humantime" is a human-readable start and end time for the workshop,
+ such as "09:00 - 16:00".'''
+
+ time = _check_meta(filename, doc, 'humantime')
+ if time:
+ _check_regexp(HUMANTIME_PATTERN, time.replace(" ", ""), 'Badly-formatted human time')
+
+
+@rule
+def workshop_check_startdate(filename, doc):
+ '''"startdate" must be machine-readable start date for the workshop,
+ and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
+
+ startdate = _check_meta(filename, doc, 'startdate')
+ try:
+ startdate = dateutil.parser.parse(startdate)
+ except ValueError as e:
+ _check(False, 'Badly-formatted start date')
+
+
+@rule
+def workshop_check_enddate(filename, doc):
+ '''"enddate" must be machine-readable end date for the workshop,
+ and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
+
+ enddate = _check_meta(filename, doc, 'enddate')
+ try:
+ enddate = dateutil.parser.parse(enddate)
+ except ValueError as e:
+ _check(False, 'Badly-formatted end date')
+
+
+@rule
+def workshop_check_latitude_longitude(filename, doc):
+ '''"latlng" must be a valid latitude and longitude represented as two
+ floating-point numbers separated by a comma.'''
+
+ latlng = _check_meta(filename, doc, 'latlng')
+ try:
+ lat, lng = latlng.split(',')
+ lat = float(lat)
+ long = float(lng)
+ _check((-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0),
+ 'Invalid numeric values for latitude/longitude')
+ except ValueError:
+ _check(False, 'Unable to parse lat/long')
+
+
+@rule
+def workshop_check_instructors(filename, doc):
+ '''"instructor" must be a non-empty comma-separated list of quoted names,
+ e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other
+ placeholders.'''
+
+ pass # FIXME
-def _check_1(filename, doc, rulename, xpath):
+@rule
+def workshop_check_helpers(filename, doc):
+ '''"helper" must be a comma-separated list of quoted names,
+ e.g. ['First name', 'Second name', ...']. The list may be empty. Do
+ not use "TBD" or other placeholders.'''
+
+ pass # FIXME
+
+
+@rule
+def workshop_check_contact(filename, doc):
+ '''"contact" must be a valid email address consisting of characters, a
+ @, and more characters. It should not be the default contact
+ email address "admin@software-carpentry.org".'''
+
+ contact = _check_meta(filename, doc, 'contact')
+ _check_regexp(EMAIL_PATTERN, contact, 'Invalid contact email')
+ _check(contact != DEFAULT_CONTACT_EMAIL, 'Cannot use default contact email')
+
+
+@rule
+def workshop_check_eventbrite(filename, doc):
+ '''The Eventbrite registration key must be 9 or more digits.'''
+
+ eventbrite = _check_meta(filename, doc, 'eventbrite')
+ _check_regexp(EVENTBRITE_PATTERN, eventbrite, 'Eventbrite key must be 9 or more digits')
+
+
+@rule
+def workshop_check_etherpad(filename, doc):
+ '''Etherpad must be a valid URL.'''
+
+ etherpad = _check_meta(filename, doc, 'etherpad')
+ _check_regexp(URL_PATTERN, etherpad, 'Etherpad address must be a valid URL')
+
+
+def _check_meta(filename, doc, metaname):
+ '''Check one metadata attribute.'''
+
+ xpath = '//html/head/meta[@name="{0}"]'.format(metaname)
+ content = _check_one_element(filename, doc, metaname, xpath, attribute='content')
+ return content
+
+
+def _check_one_element(filename, doc, rulename, xpath, attribute=None):
'''Check that an equality holds.'''
+ result = None
actual = doc.xpath(xpath)
if len(actual) != 1:
- print('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
+ MESSAGES.append('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
+ elif attribute is not None:
+ result = actual[0].attrib.get(attribute, None)
+ return result
+
+
+def _check_regexp(pattern, value, message):
+ '''Check a regular expression match if the value is a string.'''
+ _check(value is not None, message + ': value is None') and \
+ _check(re.match(pattern, value), message)
+
+
+def _check(condition, message):
+ '''Record error message if condition not met, returning condition for chaining.'''
+
+ if not condition:
+ MESSAGES.append(message)
+ return condition
def _require(condition, message):
'''Fail if condition not met.'''