Merge branch 'gh-pages' of github.com:gvwilson/new-template into gh-pages

18b3bfdc · Greg Wilson · 269239bb · 08f1537e · 18b3bfdc · 18b3bfdc
Commit 18b3bfdc authored 8 years ago by Greg Wilson
--- a/_layouts/workshop_homepage.html
+++ b/_layouts/workshop_homepage.html
@@ -7,6 +7,7 @@
    <meta name="slug" content="{{site.github.project_title}}" />
    <meta name="startdate" content="{{page.startdate}}" />
    <meta name="enddate" content="{{page.enddate}}" />
+    <meta name="humandate" content="{{page.humandate}}" />
    <meta name="country" content="{{page.country}}" />
    <meta name="venue" content="{{page.venue}}" />
    <meta name="address" content="{{page.address}}" />

--- a/bin/validator
+++ b/bin/validator
@@ -9,27 +9,104 @@ import sys
 import os
 import glob
 import fnmatch
+import re
 import yaml
 from optparse import OptionParser
 from bs4 import BeautifulSoup
 from lxml import etree
+import dateutil.parser


 # Default lesson configuration.
 LESSON_CONFIG = '''\
 patterns:
  '*.html':
-    - has_title_in_head
-    - has_navbar
-    - has_title_in_body
-    - has_footer
+    - lesson_has_title_in_head
+    - lesson_has_navbar
+    - lesson_has_title_in_body
+    - lesson_has_footer
  index.html:
-    - has_prereq
-    - has_syllabus
+    - lesson_has_prereq
+    - lesson_has_syllabus
  '*-*/index.html':
-    - has_objectives
+    - lesson_has_objectives
 '''

+# Default workshop configuration.
+WORKSHOP_CONFIG = '''\
+patterns:
+  'index.html':
+    - workshop_check_slug
+    - workshop_check_country
+    - workshop_check_language
+    - workshop_check_humandate
+    - workshop_check_humantime
+    - workshop_check_startdate
+    - workshop_check_enddate
+    - workshop_check_latitude_longitude
+    - workshop_check_instructors
+    - workshop_check_helpers
+    - workshop_check_contact
+    - workshop_check_eventbrite
+    - workshop_check_etherpad
+'''
+
+
+# Regular expression patterns for workshops.
+SLUG_PATTERN = r'.+'
+EMAIL_PATTERN = r'[^@]+@[^@]+\.[^@]+'
+HUMANTIME_PATTERN = r'((0?[1-9]|1[0-2]):[0-5]\d(am|pm)(-|to)(0?[1-9]|1[0-2]):[0-5]\d(am|pm))|((0?\d|1\d|2[0-3]):[0-5]\d(-|to)(0?\d|1\d|2[0-3]):[0-5]\d)'
+EVENTBRITE_PATTERN = r'\d{9,10}'
+URL_PATTERN = r'https?://.+'
+
+DEFAULT_CONTACT_EMAIL = 'admin@software-carpentry.org'
+
+# Country and language codes.  Note that codes mean different things: 'ar'
+# is 'Arabic' as a language but 'Argentina' as a country.
+
+ISO_COUNTRY = [
+    'ad', 'ae', 'af', 'ag', 'ai', 'al', 'am', 'an', 'ao', 'aq', 'ar', 'as',
+    'at', 'au', 'aw', 'ax', 'az', 'ba', 'bb', 'bd', 'be', 'bf', 'bg', 'bh',
+    'bi', 'bj', 'bm', 'bn', 'bo', 'br', 'bs', 'bt', 'bv', 'bw', 'by', 'bz',
+    'ca', 'cc', 'cd', 'cf', 'cg', 'ch', 'ci', 'ck', 'cl', 'cm', 'cn', 'co',
+    'cr', 'cu', 'cv', 'cx', 'cy', 'cz', 'de', 'dj', 'dk', 'dm', 'do', 'dz',
+    'ec', 'ee', 'eg', 'eh', 'er', 'es', 'et', 'eu', 'fi', 'fj', 'fk', 'fm',
+    'fo', 'fr', 'ga', 'gb', 'gd', 'ge', 'gf', 'gg', 'gh', 'gi', 'gl', 'gm',
+    'gn', 'gp', 'gq', 'gr', 'gs', 'gt', 'gu', 'gw', 'gy', 'hk', 'hm', 'hn',
+    'hr', 'ht', 'hu', 'id', 'ie', 'il', 'im', 'in', 'io', 'iq', 'ir', 'is',
+    'it', 'je', 'jm', 'jo', 'jp', 'ke', 'kg', 'kh', 'ki', 'km', 'kn', 'kp',
+    'kr', 'kw', 'ky', 'kz', 'la', 'lb', 'lc', 'li', 'lk', 'lr', 'ls', 'lt',
+    'lu', 'lv', 'ly', 'ma', 'mc', 'md', 'me', 'mg', 'mh', 'mk', 'ml', 'mm',
+    'mn', 'mo', 'mp', 'mq', 'mr', 'ms', 'mt', 'mu', 'mv', 'mw', 'mx', 'my',
+    'mz', 'na', 'nc', 'ne', 'nf', 'ng', 'ni', 'nl', 'no', 'np', 'nr', 'nu',
+    'nz', 'om', 'pa', 'pe', 'pf', 'pg', 'ph', 'pk', 'pl', 'pm', 'pn', 'pr',
+    'ps', 'pt', 'pw', 'py', 'qa', 're', 'ro', 'rs', 'ru', 'rw', 'sa', 'sb',
+    'sc', 'sd', 'se', 'sg', 'sh', 'si', 'sj', 'sk', 'sl', 'sm', 'sn', 'so',
+    'sr', 'st', 'sv', 'sy', 'sz', 'tc', 'td', 'tf', 'tg', 'th', 'tj', 'tk',
+    'tl', 'tm', 'tn', 'to', 'tr', 'tt', 'tv', 'tw', 'tz', 'ua', 'ug', 'um',
+    'us', 'uy', 'uz', 'va', 'vc', 've', 'vg', 'vi', 'vn', 'vu', 'wf', 'ws',
+    'ye', 'yt', 'za', 'zm', 'zw'
+]
+
+ISO_LANGUAGE = [
+    'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av', 'ay', 'az',
+    'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo', 'br', 'bs', 'ca', 'ce',
+    'ch', 'co', 'cr', 'cs', 'cu', 'cv', 'cy', 'da', 'de', 'dv', 'dz', 'ee',
+    'el', 'en', 'eo', 'es', 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr',
+    'fy', 'ga', 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
+    'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik', 'io', 'is',
+    'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj', 'kk', 'kl', 'km', 'kn',
+    'ko', 'kr', 'ks', 'ku', 'kv', 'kw', 'ky', 'la', 'lb', 'lg', 'li', 'ln',
+    'lo', 'lt', 'lu', 'lv', 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms',
+    'mt', 'my', 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
+    'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps', 'pt', 'qu',
+    'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd', 'se', 'sg', 'si', 'sk',
+    'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'ss', 'st', 'su', 'sv', 'sw', 'ta',
+    'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw',
+    'ty', 'ug', 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
+    'yo', 'za', 'zh', 'zu'
+]
+

 # Record all the rules.
 RULES = {}
@@ -38,6 +115,9 @@ def rule(fn):
    return fn


+# Accumulate error messages.
+MESSAGES = []
+
 def main():
    '''Main driver: check all files with all rules that apply.'''

@@ -56,6 +136,8 @@ def main():
                    if args.verbose > 1:
                        print('...', rule, file=sys.stderr)
                    RULES[rule](filename, docs[filename])
+    for m in MESSAGES:
+        print(m)


 def parse_args():
@@ -104,6 +186,8 @@ def read_config(args):
            args.config = yaml.load(reader)
    elif args.check_lesson:
        args.config = yaml.load(LESSON_CONFIG)
+    elif args.check_workshop:
+        args.config = yaml.load(WORKSHOP_CONFIG)
    else:
        assert False, 'Do not know what configuration to load'

@@ -133,63 +217,229 @@ def read_all_docs(source_dir):


 @rule
-def has_footer(filename, doc):
+def lesson_has_footer(filename, doc):
    '''Document has footer element.'''

-    _check_1(filename, doc, 'footers', '//footer')
+    _check_one_element(filename, doc, 'footers', '//footer')


 @rule
-def has_navbar(filename, doc):
+def lesson_has_navbar(filename, doc):
    '''Document has header element.'''

-    _check_1(filename, doc, 'div navbar', '//div[@class="navbar-header"]')
+    _check_one_element(filename, doc, 'div navbar', '//div[@class="navbar-header"]')


 @rule
-def has_objectives(filename, doc):
+def lesson_has_objectives(filename, doc):
    '''Episode has objectives.'''

-    _check_1(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')
+    _check_one_element(filename, doc, 'objectives div', '//blockquote[@class="objectives"]')


 @rule
-def has_prereq(filename, doc):
+def lesson_has_prereq(filename, doc):
    '''Index page has prerequisites block.'''

-    _check_1(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')
+    _check_one_element(filename, doc, 'prerequisites blockquote', '//blockquote[@class="prereq"]')


 @rule
-def has_syllabus(filename, doc):
+def lesson_has_syllabus(filename, doc):
    '''Index page has syllabus.'''

-    _check_1(filename, doc, 'syllabus', '//div[@class="syllabus"]')
-    _check_1(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
-    _check_1(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')
+    _check_one_element(filename, doc, 'syllabus', '//div[@class="syllabus"]')
+    _check_one_element(filename, doc, 'syllabus title', '//div[@class="syllabus"]/h2')
+    _check_one_element(filename, doc, 'syllabus table', '//div[@class="syllabus"]/table')


 @rule
-def has_title_in_head(filename, doc):
+def lesson_has_title_in_head(filename, doc):
    '''Document has a title in the head.'''

-    _check_1(filename, doc, 'title in head', '//head//title')
+    _check_one_element(filename, doc, 'title in head', '//head//title')


 @rule
-def has_title_in_body(filename, doc):
+def lesson_has_title_in_body(filename, doc):
    '''Document has a title in the body.'''

-    _check_1(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
+    _check_one_element(filename, doc, 'title in body', '//body//h1[@class="maintitle"]')
+
+
+@rule
+def workshop_check_slug(filename, doc):
+    content = _check_meta(filename, doc, 'slug')
+    _check_regexp(SLUG_PATTERN, content, 'invalid slug')
+
+
+@rule
+def workshop_check_country(filename, doc):
+    '''"country" must be a lowercase ISO-3166 two-letter code.'''
+
+    country = _check_meta(filename, doc, 'country')
+    _check(country in ISO_COUNTRY, 'Unknown country')
+
+
+@rule
+def workshop_check_language(filename, doc):
+    '''"language" must be a lowercase ISO-639 two-letter code.'''
+
+    language = _check_meta(filename, doc, 'language')
+    _check(language in ISO_LANGUAGE, 'Unknown language')
+
+
+@rule
+def workshop_check_humandate(filename, doc):
+    '''"humandate" must be a human-readable date with a 3-letter month and
+    4-digit year.  Examples include "Feb 18-20, 2025" and "Feb 18 and
+    20, 2025".  It may be in languages other than English, but the
+    month name should be kept short to aid formatting of the main
+    Software Carpentry web site.'''
+
+    humandate = _check_meta(filename, doc, 'humandate')
+    if _check(',' in humandate, 'Require comma in human date'):
+        month_dates, year = humandate.split(",")
+
+        # The first three characters of month_dates are not empty
+        month = month_dates[:3]
+        _check(not any(char == " " for char in month), 'Cannot be spaces in month')
+
+        # But the fourth character is empty ("February" is illegal)
+        require(month_dates[3] == " ", 'Month names must be three letters long')
+
+        # Year must contain only digits.
+        _check_regexp('\d+', year, 'Year must be only digits')
+
+
+@rule
+def workshop_check_humantime(filename, doc):
+    '''"humantime" is a human-readable start and end time for the workshop,
+    such as "09:00 - 16:00".'''
+
+    time = _check_meta(filename, doc, 'humantime')
+    if time:
+        _check_regexp(HUMANTIME_PATTERN, time.replace(" ", ""), 'Badly-formatted human time')
+
+
+@rule
+def workshop_check_startdate(filename, doc):
+    '''"startdate" must be machine-readable start date for the workshop,
+    and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
+
+    startdate = _check_meta(filename, doc, 'startdate')
+    try:
+        startdate = dateutil.parser.parse(startdate)
+    except ValueError as e:
+        _check(False, 'Badly-formatted start date')
+
+
+@rule
+def workshop_check_enddate(filename, doc):
+    '''"enddate" must be machine-readable end date for the workshop,
+    and must be in YYYY-MM-DD format, e.g., "2015-07-01".'''
+
+    enddate = _check_meta(filename, doc, 'enddate')
+    try:
+        enddate = dateutil.parser.parse(enddate)
+    except ValueError as e:
+        _check(False, 'Badly-formatted end date')
+
+
+@rule
+def workshop_check_latitude_longitude(filename, doc):
+    '''"latlng" must be a valid latitude and longitude represented as two
+    floating-point numbers separated by a comma.'''
+
+    latlng = _check_meta(filename, doc, 'latlng')
+    try:
+        lat, lng = latlng.split(',')
+        lat = float(lat)
+        long = float(lng)
+        _check((-90.0 <= lat <= 90.0) and (-180.0 <= long <= 180.0),
+               'Invalid numeric values for latitude/longitude')
+    except ValueError:
+        _check(False, 'Unable to parse lat/long')
+
+
+@rule
+def workshop_check_instructors(filename, doc):
+    '''"instructor" must be a non-empty comma-separated list of quoted names,
+    e.g. ['First name', 'Second name', ...'].  Do not use "TBD" or other
+    placeholders.'''
+
+    pass # FIXME


-def _check_1(filename, doc, rulename, xpath):
+@rule
+def workshop_check_helpers(filename, doc):
+    '''"helper" must be a comma-separated list of quoted names,
+    e.g. ['First name', 'Second name', ...'].  The list may be empty.  Do
+    not use "TBD" or other placeholders.'''
+
+    pass # FIXME
+
+
+@rule
+def workshop_check_contact(filename, doc):
+    '''"contact" must be a valid email address consisting of characters, a
+    @, and more characters.  It should not be the default contact
+    email address "admin@software-carpentry.org".'''
+
+    contact = _check_meta(filename, doc, 'contact')
+    _check_regexp(EMAIL_PATTERN, contact, 'Invalid contact email')
+    _check(contact != DEFAULT_CONTACT_EMAIL, 'Cannot use default contact email')
+
+
+@rule
+def workshop_check_eventbrite(filename, doc):
+    '''The Eventbrite registration key must be 9 or more digits.'''
+
+    eventbrite = _check_meta(filename, doc, 'eventbrite')
+    _check_regexp(EVENTBRITE_PATTERN, eventbrite, 'Eventbrite key must be 9 or more digits')
+
+
+@rule
+def workshop_check_etherpad(filename, doc):
+    '''Etherpad must be a valid URL.'''
+
+    etherpad = _check_meta(filename, doc, 'etherpad')
+    _check_regexp(URL_PATTERN, etherpad, 'Etherpad address must be a valid URL')
+
+
+def _check_meta(filename, doc, metaname):
+    '''Check one metadata attribute.'''
+
+    xpath = '//html/head/meta[@name="{0}"]'.format(metaname)
+    content = _check_one_element(filename, doc, metaname, xpath, attribute='content')
+    return content
+
+
+def _check_one_element(filename, doc, rulename, xpath, attribute=None):
    '''Check that an equality holds.'''

+    result = None
    actual = doc.xpath(xpath)
    if len(actual) != 1:
-        print('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
+        MESSAGES.append('In {0}, checking {1}: expected 1 match, got {2}'.format(filename, rulename, len(actual)))
+    elif attribute is not None:
+        result = actual[0].attrib.get(attribute, None)
+    return result
+
+
+def _check_regexp(pattern, value, message):
+    '''Check a regular expression match if the value is a string.'''

+    _check(value is not None, message + ': value is None') and \
+    _check(re.match(pattern, value), message)
+
+
+def _check(condition, message):
+    '''Record error message if condition not met, returning condition for chaining.'''
+
+    if not condition:
+        MESSAGES.append(message)
+    return condition

 def _require(condition, message):
    '''Fail if condition not met.'''