Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#! /usr/bin/env python
import json
import logging
import re
import sys
try: # Hack to make codebase compatible with python 2 and 3
basestring
except NameError:
basestring = str
# Common validation functions
def is_list(text):
"""Validate whether the provided string can be converted to python list"""
text = text.strip()
try:
text_as_list = json.loads(text)
except ValueError:
logging.debug("Could not convert string to python object: {0}".format(text))
return False
return isinstance(text_as_list, list)
def is_str(text):
"""Validate whether the input is a non-blank python string"""
return isinstance(text, basestring) and len(text) > 0
def is_numeric(text):
"""Validate whether the string represents a number (including unicode)"""
try:
float(text)
return True
except ValueError:
return False
#### Text cleanup functions, pre-validation
def strip_attrs(s):
"""Strip attributes of the form {.name} from a markdown title string"""
return re.sub(r"\s\{\..*?\}", "", s)
def get_css_class(s):
"""Return any and all CSS classes (when a line is suffixed by {.classname})
Returns empty list when """
return re.findall("\{\.(.*?)\}", s)
### Helper objects
class CommonMarkHelper(object):
"""Basic helper functions for working with the internal abstract syntax
tree produced by CommonMark parser"""
def __init__(self, ast):
self.data = ast
self.children = self.data.children
def get_doc_header_title(self):
"""Helper method for SWC templates: get the document title from
the YAML headers"""
doc_headers = self.data.children[1] # Throw index error if none found
for s in doc_headers.strings:
label, contents = s.split(":", 1)
if label.lower() == "title":
return contents.strip()
# If title not found, return an empty string for display purposes
return ''
def get_doc_header_subtitle(self):
"""Helper method for SWC templates: get the document title from
the YAML headers"""
doc_headers = self.data.children[1] # Throw index error if none found
for s in doc_headers.strings:
label, contents = s.split(":", 1)
if label.lower() == "subtitle":
return contents.strip()
# If title not found, return an empty string for display purposes
return ''
def get_block_titled(self, title, heading_level=2, ast_node=None):
"""Examine children. Return all children of the given node that:
a) are blockquoted elements, and
b) contain a heading with the specified text, at the specified level.
For example, this can be used to find the "Prerequisites" section
in index.md
Returns empty list if no appropriate node is found"""
if ast_node is None:
ast_node = self.data
return [n for n in ast_node.children
if self.is_block(n) and
self.has_section_heading(
title,
ast_node=n,
heading_level=heading_level,
show_msg=False)]
def get_section_headings(self, ast_node=None):
"""Returns a list of ast nodes that are headings"""
if ast_node is None:
ast_node = self.data
return [n for n in ast_node.children if self.is_heading(n)]
def get_link_info(self, link_node):
"""Given a link node, return the link title and destination"""
if not self.is_external(link_node):
raise TypeError("Cannot apply this method to something that is not a link")
dest = link_node.destination
try:
link_text = link_node.label[0].c
except:
link_text = None
return dest, link_text
def find_external_links(self, ast_node=None, parent_crit=None):
"""Recursive function that locates all references to external content
under specified node. (links or images)"""
ast_node = ast_node or self.data
if parent_crit is None:
# User can optionally provide a function to filter link list
# based on where link appears. (eg, only links in headings)
# If no filter is provided, accept all links in that node.
parent_crit = lambda n: True
# Link can be node itself, or hiding in inline content
links = [n for n in ast_node.inline_content
if self.is_external(n) and parent_crit(ast_node)]
if self.is_external(ast_node):
links.append(ast_node)
# Also look for links in sub-nodes
for n in ast_node.children:
links.extend(self.find_external_links(n,
parent_crit=parent_crit))
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
return links
def has_section_heading(self, section_title, ast_node=None,
heading_level=2, limit=sys.maxsize, show_msg=True):
"""Does the file contain (<= x copies of) specified heading text?
Will strip off any CSS attributes when looking for the section title"""
if ast_node is None:
ast_node = self.data
num_nodes = len([n for n in self.get_section_headings(ast_node)
if (strip_attrs(n.strings[0]) == section_title)
and (n.level == heading_level)])
# Suppress error msg if used as a helper method
if show_msg and num_nodes == 0:
logging.error("Document does not contain the specified "
"heading: {0}".format(section_title))
elif show_msg and num_nodes > limit:
logging.error("Document must not contain more than {0} copies of"
" the heading {1}".format(limit, section_title or 0))
elif show_msg:
logging.info("Verified that document contains the specified"
" heading: {0}".format(section_title))
return (0 < num_nodes <= limit)
def has_number_children(self, ast_node,
exact=None, minc=0, maxc=sys.maxsize):
"""Does the specified node (such as a bulleted list) have the expected
number of children?"""
if exact: # If specified, must have exactly this number of children
minc = maxc = exact
return (minc <= len(ast_node.children) <= maxc)
# Helpers, in case the evolving CommonMark spec changes the names of nodes
def is_hr(self, ast_node):
"""Is the node a horizontal rule (hr)?"""
return ast_node.t == 'HorizontalRule'
def is_heading(self, ast_node):
"""Is the node a heading/ title?"""
return ast_node.t == "ATXHeader"
def is_paragraph(self, ast_node):
"""Is the node a paragraph?"""
return ast_node.t == "Paragraph"
def is_list(self, ast_node):
"""Is the node a list? (ordered or unordered)"""
return ast_node.t == "List"
def is_link(self, ast_node):
"""Is the node a link?"""
return ast_node.t == "Link"
def is_external(self, ast_node):
"""Does the node reference content outside the file? (image or link)"""
return ast_node.t in ("Link", "Image")
def is_block(self, ast_node):
"""Is the node a BlockQuoted element?"""
return ast_node.t == "BlockQuote"