diff --git a/_layouts/workshop_homepage.html b/_layouts/workshop_homepage.html index ab3cf74a25adbfe9dd947118aeaedd6366db9855..95f4fde7b1365c0842a85434c540d0b79c0125c9 100644 --- a/_layouts/workshop_homepage.html +++ b/_layouts/workshop_homepage.html @@ -7,6 +7,7 @@ <meta name="slug" content="{{site.github.project_title}}" /> <meta name="startdate" content="{{page.startdate}}" /> <meta name="enddate" content="{{page.enddate}}" /> + <meta name="humandate" content="{{page.humandate}}" /> <meta name="country" content="{{page.country}}" /> <meta name="venue" content="{{page.venue}}" /> <meta name="address" content="{{page.address}}" /> diff --git a/bin/validator b/bin/validator index 2c5ad24abe4fcbc8b3291bd13edf3bd2f6922f66..b43af6bb07ff16f94f594e22905dd754d6e42773 100755 --- a/bin/validator +++ b/bin/validator @@ -9,27 +9,104 @@ import sys import os import glob import fnmatch +import re import yaml from optparse import OptionParser from bs4 import BeautifulSoup from lxml import etree +import dateutil.parser # Default lesson configuration. LESSON_CONFIG = '''\ patterns: '*.html': - - has_title_in_head - - has_navbar - - has_title_in_body - - has_footer + - lesson_has_title_in_head + - lesson_has_navbar + - lesson_has_title_in_body + - lesson_has_footer index.html: - - has_prereq - - has_syllabus + - lesson_has_prereq + - lesson_has_syllabus '*-*/index.html': - - has_objectives + - lesson_has_objectives ''' +# Default workshop configuration. +WORKSHOP_CONFIG = '''\ +patterns: + 'index.html': + - workshop_check_slug + - workshop_check_country + - workshop_check_language + - workshop_check_humandate + - workshop_check_humantime + - workshop_check_startdate + - workshop_check_enddate + - workshop_check_latitude_longitude + - workshop_check_instructors + - workshop_check_helpers + - workshop_check_contact + - workshop_check_eventbrite + - workshop_check_etherpad +''' + + +# Regular expression patterns for workshops. +SLUG_PATTERN = r'.+' +EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+' +HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)' +EVENTBRITE_PATTERN = r'\d{9,10}' +URL_PATTERN = r'https?://.+' + +DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org' + +# Country and language codes. Note that codes mean different things: 'ar' +# is 'Arabic' as a language but 'Argentina' as a country. + +ISO_COUNTRY = [ + 'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as', + 'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh', + 'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz', + 'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co', + 'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz', + 'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm', + 'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm', + 'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn', + 'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is', + 'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp', + 'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt', + 'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm', + 'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my', + 'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu', + 'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr', + 'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb', + 'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so', + 'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk', + 'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um', + 'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws', + 'ye', 'yt', 'za', 'zm', 'zw' +] + +ISO_LANGUAGE = [ + 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az', + 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce', + 'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee', + 'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr', + 'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr', + 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is', + 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn', + 'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln', + 'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', + 'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv', + 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu', + 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk', + 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta', + 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw', + 'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi', + 'yo', 'za', 'zh', 'zu' +] + # Record all the rules. RULES = {} @@ -38,6 +115,9 @@ def rule(fn): return fn +# Accumulate error messages. +MESSAGES = [] + def main(): '''Main driver: check all files with all rules that apply.''' @@ -56,6 +136,8 @@ def main(): if args.verbose > 1: print('...', rule, file=sys.stderr) RULES[rule](filename, docs[filename]) + for m in MESSAGES: + print(m) def parse_args(): @@ -104,6 +186,8 @@ def read_config(args): args.config = yaml.load(reader) elif args.check_lesson: args.config = yaml.load(LESSON_CONFIG) + elif args.check_workshop: + args.config = yaml.load(WORKSHOP_CONFIG) else: assert False, 'Do not know what configuration to load' @@ -133,63 +217,229 @@ def read_all_docs(source_dir): @rule -def has_footer(filename, doc): +def lesson_has_footer(filename, doc): '''Document has footer element.''' - _check_1(filename, doc, 'footers', '//footer') + _check_one_element(filename, doc, 'footers', '//footer') @rule -def has_navbar(filename, doc): +def lesson_has_navbar(filename, doc): '''Document has header element.''' - _check_1(filename, doc, 'div navbar', '//div[@class="navbar-header"]') + _check_one_element(filename, doc, 'div navbar', '//div[@class="navbar-header"]') @rule -def has_objectives(filename, doc): +def lesson_has_objectives(filename, doc): '''Episode has objectives.''' - _check_1(filename, doc, 'objectives div', '//blockquote[@class="objectives"]') + _check_one_element(filename, doc, 'objectives div', '//blockquote[@class="objectives"]') @rule -def has_prereq(filename, doc): +def lesson_has_prereq(filename, doc): '''Index page has prerequisites block.''' - _check_1(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]') + _check_one_element(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]') @rule -def has_syllabus(filename, doc): +def lesson_has_syllabus(filename, doc): '''Index page has syllabus.''' - _check_1(filename, doc, 'syllabus', '//div[@class="syllabus"]') - _check_1(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2') - _check_1(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table') + _check_one_element(filename, doc, 'syllabus', '//div[@class="syllabus"]') + _check_one_element(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2') + _check_one_element(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table') @rule -def has_title_in_head(filename, doc): +def lesson_has_title_in_head(filename, doc): '''Document has a title in the head.''' - _check_1(filename, doc, 'title in head', '//head//title') + _check_one_element(filename, doc, 'title in head', '//head//title') @rule -def has_title_in_body(filename, doc): +def lesson_has_title_in_body(filename, doc): '''Document has a title in the body.''' - _check_1(filename, doc, 'title in body', '//body//h1[@class="maintitle"]') + _check_one_element(filename, doc, 'title in body', '//body//h1[@class="maintitle"]') + + +@rule +def workshop_check_slug(filename, doc): + content = _check_meta(filename, doc, 'slug') + _check_regexp(SLUG_PATTERN, content, 'invalid slug') + + +@rule +def workshop_check_country(filename, doc): + '''"country" must be a lowercase ISO-3166 two-letter code.''' + + country = _check_meta(filename, doc, 'country') + _check(country in ISO_COUNTRY, 'Unknown country') + + +@rule +def workshop_check_language(filename, doc): + '''"language" must be a lowercase ISO-639 two-letter code.''' + + language = _check_meta(filename, doc, 'language') + _check(language in ISO_LANGUAGE, 'Unknown language') + + +@rule +def workshop_check_humandate(filename, doc): + '''"humandate" must be a human-readable date with a 3-letter month and + 4-digit year. Examples include "Feb 18-20, 2025" and "Feb 18 and + 20, 2025". It may be in languages other than English, but the + month name should be kept short to aid formatting of the main + Software Carpentry web site.''' + + humandate = _check_meta(filename, doc, 'humandate') + if _check(',' in humandate, 'Require comma in human date'): + month_dates, year = humandate.split(",") + + # The first three characters of month_dates are not empty + month = month_dates[:3] + _check(not any(char == " " for char in month), 'Cannot be spaces in month') + + # But the fourth character is empty ("February" is illegal) + require(month_dates[3] == " ", 'Month names must be three letters long') + + # Year must contain only digits. + _check_regexp('\d+', year, 'Year must be only digits') + + +@rule +def workshop_check_humantime(filename, doc): + '''"humantime" is a human-readable start and end time for the workshop, + such as "09:00 - 16:00".''' + + time = _check_meta(filename, doc, 'humantime') + if time: + _check_regexp(HUMANTIME_PATTERN, time.replace(" ", ""), 'Badly-formatted human time') + + +@rule +def workshop_check_startdate(filename, doc): + '''"startdate" must be machine-readable start date for the workshop, + and must be in YYYY-MM-DD format, e.g., "2015-07-01".''' + + startdate = _check_meta(filename, doc, 'startdate') + try: + startdate = dateutil.parser.parse(startdate) + except ValueError as e: + _check(False, 'Badly-formatted start date') + + +@rule +def workshop_check_enddate(filename, doc): + '''"enddate" must be machine-readable end date for the workshop, + and must be in YYYY-MM-DD format, e.g., "2015-07-01".''' + + enddate = _check_meta(filename, doc, 'enddate') + try: + enddate = dateutil.parser.parse(enddate) + except ValueError as e: + _check(False, 'Badly-formatted end date') + + +@rule +def workshop_check_latitude_longitude(filename, doc): + '''"latlng" must be a valid latitude and longitude represented as two + floating-point numbers separated by a comma.''' + + latlng = _check_meta(filename, doc, 'latlng') + try: + lat, lng = latlng.split(',') + lat = float(lat) + long = float(lng) + _check((-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0), + 'Invalid numeric values for latitude/longitude') + except ValueError: + _check(False, 'Unable to parse lat/long') + + +@rule +def workshop_check_instructors(filename, doc): + '''"instructor" must be a non-empty comma-separated list of quoted names, + e.g. ['First name', 'Second name', ...']. Do not use "TBD" or other + placeholders.''' + + pass # FIXME -def _check_1(filename, doc, rulename, xpath): +@rule +def workshop_check_helpers(filename, doc): + '''"helper" must be a comma-separated list of quoted names, + e.g. ['First name', 'Second name', ...']. The list may be empty. Do + not use "TBD" or other placeholders.''' + + pass # FIXME + + +@rule +def workshop_check_contact(filename, doc): + '''"contact" must be a valid email address consisting of characters, a + @, and more characters. It should not be the default contact + email address "admin@software-carpentry.org".''' + + contact = _check_meta(filename, doc, 'contact') + _check_regexp(EMAIL_PATTERN, contact, 'Invalid contact email') + _check(contact != DEFAULT_CONTACT_EMAIL, 'Cannot use default contact email') + + +@rule +def workshop_check_eventbrite(filename, doc): + '''The Eventbrite registration key must be 9 or more digits.''' + + eventbrite = _check_meta(filename, doc, 'eventbrite') + _check_regexp(EVENTBRITE_PATTERN, eventbrite, 'Eventbrite key must be 9 or more digits') + + +@rule +def workshop_check_etherpad(filename, doc): + '''Etherpad must be a valid URL.''' + + etherpad = _check_meta(filename, doc, 'etherpad') + _check_regexp(URL_PATTERN, etherpad, 'Etherpad address must be a valid URL') + + +def _check_meta(filename, doc, metaname): + '''Check one metadata attribute.''' + + xpath = '//html/head/meta[@name="{0}"]'.format(metaname) + content = _check_one_element(filename, doc, metaname, xpath, attribute='content') + return content + + +def _check_one_element(filename, doc, rulename, xpath, attribute=None): '''Check that an equality holds.''' + result = None actual = doc.xpath(xpath) if len(actual) != 1: - print('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual))) + MESSAGES.append('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual))) + elif attribute is not None: + result = actual[0].attrib.get(attribute, None) + return result + + +def _check_regexp(pattern, value, message): + '''Check a regular expression match if the value is a string.''' + _check(value is not None, message + ': value is None') and \ + _check(re.match(pattern, value), message) + + +def _check(condition, message): + '''Record error message if condition not met, returning condition for chaining.''' + + if not condition: + MESSAGES.append(message) + return condition def _require(condition, message): '''Fail if condition not met.'''