Skip to content
Snippets Groups Projects
validation_helpers.py 7.3 KiB
Newer Older
#! /usr/bin/env python

import json
import logging
import re
import sys

try:  # Hack to make codebase compatible with python 2 and 3
  basestring
except NameError:
  basestring = str


# Common validation functions
def is_list(text):
    """Validate whether the provided string can be converted to python list"""
    text = text.strip()
    try:
        text_as_list = json.loads(text)
    except ValueError:
        logging.debug("Could not convert string to python object: {0}".format(text))
        return False

    return isinstance(text_as_list, list)


def is_str(text):
    """Validate whether the input is a non-blank python string"""
    return isinstance(text, basestring) and len(text) > 0


def is_numeric(text):
    """Validate whether the string represents a number (including unicode)"""
    try:
        float(text)
        return True
    except ValueError:
        return False


#### Text cleanup functions, pre-validation
def strip_attrs(s):
    """Strip attributes of the form {.name} from a markdown title string"""
    return re.sub(r"\s\{\..*?\}", "", s)


def get_css_class(s):
    """Return any and all CSS classes (when a line is suffixed by {.classname})
    Returns empty list when """
    return re.findall("\{\.(.*?)\}", s)


### Helper objects
class CommonMarkHelper(object):
    """Basic helper functions for working with the internal abstract syntax
    tree produced by CommonMark parser"""
    def __init__(self, ast):
        self.data = ast
        self.children = self.data.children

    def get_doc_header_title(self):
        """Helper method for SWC templates: get the document title from
        the YAML headers"""
        doc_headers = self.data.children[1]  # Throw index error if none found

        for s in doc_headers.strings:
            label, contents = s.split(":", 1)
            if label.lower() == "title":
                return contents.strip()

        # If title not found, return an empty string for display purposes
        return ''

    def get_doc_header_subtitle(self):
        """Helper method for SWC templates: get the document title from
        the YAML headers"""
        doc_headers = self.data.children[1]  # Throw index error if none found

        for s in doc_headers.strings:
            label, contents = s.split(":", 1)
            if label.lower() == "subtitle":
                return contents.strip()

        # If title not found, return an empty string for display purposes
        return ''

    def get_block_titled(self, title, heading_level=2, ast_node=None):
        """Examine children. Return all children of the given node that:
        a) are blockquoted elements, and
        b) contain a heading with the specified text, at the specified level.
        For example, this can be used to find the "Prerequisites" section
            in index.md

        Returns empty list if no appropriate node is found"""
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children
                if self.is_block(n) and
                self.has_section_heading(
                    title,
                    ast_node=n,
                    heading_level=heading_level,
                    show_msg=False)]

    def get_section_headings(self, ast_node=None):
        """Returns a list of ast nodes that are headings"""
        if ast_node is None:
            ast_node = self.data
        return [n for n in ast_node.children if self.is_heading(n)]

    def get_link_info(self, link_node):
        """Given a link node, return the link title and destination"""
        if not self.is_external(link_node):
            raise TypeError("Cannot apply this method to something that is not a link")

        dest = link_node.destination
        try:
            link_text = link_node.label[0].c
        except:
            link_text = None

        return dest, link_text

    def find_external_links(self, ast_node=None, parent_crit=None):
        """Recursive function that locates all references to external content
         under specified node. (links or images)"""
        ast_node = ast_node or self.data
        if parent_crit is None:
            # User can optionally provide a function to filter link list
            # based on where link appears. (eg, only links in headings)
            # If no filter is provided, accept all links in that node.
            parent_crit = lambda n: True

        # Link can be node itself, or hiding in inline content
        links = [n for n in ast_node.inline_content
                 if self.is_external(n) and parent_crit(ast_node)]

        if self.is_external(ast_node):
            links.append(ast_node)

        # Also look for links in sub-nodes
        for n in ast_node.children:
            links.extend(self.find_external_links(n,
                                                  parent_crit=parent_crit))

        return links

    def has_section_heading(self, section_title, ast_node=None,
                            heading_level=2, limit=sys.maxsize, show_msg=True):
        """Does the file contain (<= x copies of) specified heading text?
        Will strip off any CSS attributes when looking for the section title"""
        if ast_node is None:
            ast_node = self.data

        num_nodes = len([n for n in self.get_section_headings(ast_node)
                         if (strip_attrs(n.strings[0]) == section_title)
                         and (n.level == heading_level)])

        # Suppress error msg if used as a helper method
        if show_msg and num_nodes == 0:
            logging.error("Document does not contain the specified "
                          "heading: {0}".format(section_title))
        elif show_msg and num_nodes > limit:
            logging.error("Document must not contain more than {0} copies of"
                          " the heading {1}".format(limit, section_title or 0))
        elif show_msg:
            logging.info("Verified that document contains the specified"
                         " heading: {0}".format(section_title))
        return (0 < num_nodes <= limit)

    def has_number_children(self, ast_node,
                            exact=None, minc=0, maxc=sys.maxsize):
        """Does the specified node (such as a bulleted list) have the expected
         number of children?"""

        if exact:  # If specified, must have exactly this number of children
            minc = maxc = exact

        return (minc <= len(ast_node.children) <= maxc)

    # Helpers, in case the evolving CommonMark spec changes the names of nodes
    def is_hr(self, ast_node):
        """Is the node a horizontal rule (hr)?"""
        return ast_node.t == 'HorizontalRule'

    def is_heading(self, ast_node):
        """Is the node a heading/ title?"""
        return ast_node.t == "ATXHeader"

    def is_paragraph(self, ast_node):
        """Is the node a paragraph?"""
        return ast_node.t == "Paragraph"

    def is_list(self, ast_node):
        """Is the node a list? (ordered or unordered)"""
        return ast_node.t == "List"

    def is_link(self, ast_node):
        """Is the node a link?"""
        return ast_node.t == "Link"

    def is_external(self, ast_node):
        """Does the node reference content outside the file? (image or link)"""
        return ast_node.t in ("Link", "Image")

    def is_block(self, ast_node):
        """Is the node a BlockQuoted element?"""
        return ast_node.t == "BlockQuote"