Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
Lesson Template
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
prace-lessons
Lesson Template
Commits
02430798
Commit
02430798
authored
10 years ago
by
abought
Browse files
Options
Downloads
Plain Diff
Merge pull request #113 from abought/link_check_text
Link text validation changes
parents
40c84e19
7c92facb
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
.gitignore
+0
-1
0 additions, 1 deletion
.gitignore
tools/check.py
+79
-34
79 additions, 34 deletions
tools/check.py
tools/test_check.py
+46
-0
46 additions, 0 deletions
tools/test_check.py
tools/validation_helpers.py
+9
-3
9 additions, 3 deletions
tools/validation_helpers.py
with
134 additions
and
38 deletions
.gitignore
+
0
−
1
View file @
02430798
*~
*.pyc
.DS_Store
_site
This diff is collapsed.
Click to expand it.
tools/check.py
+
79
−
34
View file @
02430798
...
...
@@ -10,7 +10,6 @@ Contains validators for several kinds of template.
Call at command line with flag -h to see options and usage instructions.
"""
from
__future__
import
print_function
import
argparse
import
glob
...
...
@@ -203,33 +202,31 @@ class MarkdownValidator(object):
return
(
len
(
missing_headings
)
==
0
)
and
\
valid_order
and
no_extra
and
correct_level
def
_validate_one_link
(
self
,
link_node
):
"""
Logic to validate a single external asset (image or link)
# Link validation methods
def
_validate_one_html_link
(
self
,
link_node
,
check_text
=
False
):
"""
Any local html file being linked was generated as part of the lesson.
Therefore, file links (.html) must have a Markdown file
in the expected folder.
The title of the linked Markdown document should match the link text.
For other assets (links or images), just verify that a file exists
"""
dest
,
link_text
=
self
.
ast
.
get_link_info
(
link_node
)
if
re
.
match
(
r
"
^[\w,\s-]+\.(html?)
"
,
dest
,
re
.
IGNORECASE
):
# HTML files in same folder are made from Markdown; special tests
fn
=
dest
.
split
(
"
#
"
)[
0
]
# Split anchor name from filename
expected_md_fn
=
os
.
path
.
splitext
(
fn
)[
0
]
+
os
.
extsep
+
"
md
"
expected_md_path
=
os
.
path
.
join
(
self
.
markdown_dir
,
expected_md_fn
)
if
not
os
.
path
.
isfile
(
expected_md_path
):
logging
.
error
(
"
In {0}:
"
"
The document links to {1}, but could not find
"
"
the expected markdown file {2}
"
.
format
(
self
.
filename
,
dest
,
expected_md_path
))
return
False
# HTML files in same folder are made from Markdown; special tests
fn
=
dest
.
split
(
"
#
"
)[
0
]
# Split anchor name from filename
expected_md_fn
=
os
.
path
.
splitext
(
fn
)[
0
]
+
os
.
extsep
+
"
md
"
expected_md_path
=
os
.
path
.
join
(
self
.
markdown_dir
,
expected_md_fn
)
if
not
os
.
path
.
isfile
(
expected_md_path
):
logging
.
error
(
"
In {0}:
"
"
The document links to {1}, but could not find
"
"
the expected markdown file {2}
"
.
format
(
self
.
filename
,
fn
,
expected_md_path
))
return
False
if
check_text
is
True
:
# If file exists, parse and validate link text = node title
with
open
(
expected_md_path
,
'
rU
'
)
as
link_dest_file
:
dest_contents
=
link_dest_file
.
read
()
...
...
@@ -247,37 +244,68 @@ class MarkdownValidator(object):
self
.
filename
,
dest
,
link_text
,
dest_page_title
))
return
False
elif
not
re
.
match
(
r
"
^((https?|ftp)://)
"
,
dest
,
re
.
IGNORECASE
)
\
return
True
def
_validate_one_link
(
self
,
link_node
,
check_text
=
False
):
"""
Logic to validate a single link to a file asset
Performs special checks for links to a local markdown file.
For links or images, just verify that a file exists.
"""
dest
,
link_text
=
self
.
ast
.
get_link_info
(
link_node
)
if
re
.
match
(
r
"
^[\w,\s-]+\.(html?)
"
,
dest
,
re
.
IGNORECASE
):
# Validate local html links have matching md file
return
self
.
_validate_one_html_link
(
link_node
,
check_text
=
check_text
)
elif
not
re
.
match
(
r
"
^((https?|ftp)://.+)
"
,
dest
,
re
.
IGNORECASE
)
\
and
not
re
.
match
(
r
"
^#.*
"
,
dest
):
# If not web URL, and not anchor on same page, then
# verify that local file exists
dest_path
=
os
.
path
.
join
(
self
.
lesson_dir
,
dest
)
dest_path
=
dest_path
.
split
(
"
#
"
)[
0
]
# Split anchor from filename
if
not
os
.
path
.
isfile
(
dest_path
):
fn
=
dest
.
split
(
"
#
"
)[
0
]
# Split anchor name from filename
logging
.
error
(
"
In {0}:
"
"
Could not find the linked asset file
"
"
{1} in {2}. If this is a URL, it must be
"
"
prefixed with http(s):// or ftp://.
"
.
format
(
self
.
filename
,
dest
,
dest_path
))
self
.
filename
,
fn
,
dest_path
))
return
False
else
:
logging
.
warnin
g
(
logging
.
debu
g
(
"
In {0}:
"
"
Skipped validation of link {1}
"
.
format
(
self
.
filename
,
dest
))
return
True
def
_validate_links
(
self
,
links_to_skip
=
()):
def
_partition_links
(
self
):
"""
Fetch links in document. If this template has special requirements
for link text (eg only some links
'
text should match dest page title),
filter the list accordingly.
Default behavior: don
'
t check the text of any links
"""
check_text
=
[]
no_check_text
=
self
.
ast
.
find_external_links
()
return
check_text
,
no_check_text
def
_validate_links
(
self
):
"""
Validate all references to external content
This includes links AND images: these are the two types of node that
CommonMark assigns a .destination property
"""
links
=
self
.
ast
.
find_external
_links
()
check_text
,
no_check_text
=
self
.
_partition
_links
()
valid
=
True
for
link_node
in
links
:
if
link_node
.
destination
not
in
links_to_skip
:
res
=
self
.
_validate_one_link
(
link_node
)
valid
=
valid
and
res
for
link_node
in
check_text
:
res
=
self
.
_validate_one_link
(
link_node
,
check_text
=
True
)
valid
=
valid
and
res
for
link_node
in
no_check_text
:
res
=
self
.
_validate_one_link
(
link_node
,
check_text
=
False
)
valid
=
valid
and
res
return
valid
def
_run_tests
(
self
):
...
...
@@ -309,6 +337,11 @@ class IndexPageValidator(MarkdownValidator):
DOC_HEADERS
=
{
'
layout
'
:
vh
.
is_str
,
'
title
'
:
vh
.
is_str
}
def
_partition_links
(
self
):
"""
Check the text of every link in index.md
"""
check_text
=
self
.
ast
.
find_external_links
()
return
check_text
,
[]
def
_validate_intro_section
(
self
):
"""
Validate the intro section
...
...
@@ -339,12 +372,6 @@ class IndexPageValidator(MarkdownValidator):
self
.
filename
))
return
intro_section
and
prereqs_tests
def
_validate_links
(
self
,
links_to_skip
=
(
'
motivation.html
'
,
'
reference.html
'
,
'
discussion.html
'
,
'
instructors.html
'
)):
return
super
(
IndexPageValidator
,
self
).
_validate_links
(
links_to_skip
)
def
_run_tests
(
self
):
parent_tests
=
super
(
IndexPageValidator
,
self
).
_run_tests
()
tests
=
[
self
.
_validate_intro_section
()]
...
...
@@ -418,6 +445,15 @@ class ReferencePageValidator(MarkdownValidator):
"
title
"
:
vh
.
is_str
,
"
subtitle
"
:
vh
.
is_str
}
def
_partition_links
(
self
):
"""
For reference.md, only check that text of link matches
dest page subtitle if the link is in a heading
"""
all_links
=
self
.
ast
.
find_external_links
()
check_text
=
self
.
ast
.
find_external_links
(
parent_crit
=
self
.
ast
.
is_heading
)
dont_check_text
=
[
n
for
n
in
all_links
if
n
not
in
check_text
]
return
check_text
,
dont_check_text
def
_validate_glossary_entry
(
self
,
glossary_entry
):
"""
Validate glossary entry
...
...
@@ -491,6 +527,15 @@ class InstructorPageValidator(MarkdownValidator):
"
title
"
:
vh
.
is_str
,
"
subtitle
"
:
vh
.
is_str
}
def
_partition_links
(
self
):
"""
For instructors.md, only check that text of link matches
dest page subtitle if the link is in a heading
"""
all_links
=
self
.
ast
.
find_external_links
()
check_text
=
self
.
ast
.
find_external_links
(
parent_crit
=
self
.
ast
.
is_heading
)
dont_check_text
=
[
n
for
n
in
all_links
if
n
not
in
check_text
]
return
check_text
,
dont_check_text
class
LicensePageValidator
(
MarkdownValidator
):
"""
Validate LICENSE.md: user should not edit this file
"""
...
...
This diff is collapsed.
Click to expand it.
tools/test_check.py
+
46
−
0
View file @
02430798
...
...
@@ -202,6 +202,18 @@ Paragraph of introductory material.
self
.
assertFalse
(
validator
.
_validate_intro_section
())
# TESTS INVOLVING LINKS TO OTHER CONTENT
def
test_should_check_text_of_all_links_in_index
(
self
):
"""
Text of every local-html link in index.md should
match dest page title
"""
validator
=
self
.
_create_validator
(
"""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)
"""
)
links
=
validator
.
ast
.
find_external_links
()
check_text
,
dont_check_text
=
validator
.
_partition_links
()
self
.
assertEqual
(
len
(
dont_check_text
),
0
)
self
.
assertEqual
(
len
(
check_text
),
2
)
def
test_file_links_validate
(
self
):
"""
Verify that all links in a sample file validate.
Involves checking for example files; may fail on
"
core
"
branch
"""
...
...
@@ -303,6 +315,18 @@ minutes: not a number
Some text
"""
)
self
.
assertFalse
(
validator
.
_validate_has_no_headings
())
def
test_should_not_check_text_of_links_in_topic
(
self
):
"""
Never check that text of local-html links in topic
matches dest title
"""
validator
=
self
.
_create_validator
(
"""
## [This link is in a heading](reference.html)
[Topic Title One](01-one.html#anchor)
"""
)
links
=
validator
.
ast
.
find_external_links
()
check_text
,
dont_check_text
=
validator
.
_partition_links
()
self
.
assertEqual
(
len
(
dont_check_text
),
2
)
self
.
assertEqual
(
len
(
check_text
),
0
)
def
test_sample_file_passes_validation
(
self
):
sample_validator
=
self
.
VALIDATOR
(
self
.
SAMPLE_FILE
)
res
=
sample_validator
.
validate
()
...
...
@@ -385,6 +409,28 @@ class TestInstructorPage(BaseTemplateTest):
SAMPLE_FILE
=
os
.
path
.
join
(
MARKDOWN_DIR
,
"
instructors.md
"
)
VALIDATOR
=
check
.
InstructorPageValidator
def
test_should_selectively_check_text_of_links_in_topic
(
self
):
"""
Only verify that text of local-html links in topic
matches dest title if the link is in a heading
"""
validator
=
self
.
_create_validator
(
"""
## [Reference](reference.html)
[Topic Title One](01-one.html#anchor)
"""
)
check_text
,
dont_check_text
=
validator
.
_partition_links
()
self
.
assertEqual
(
len
(
dont_check_text
),
1
)
self
.
assertEqual
(
len
(
check_text
),
1
)
def
test_link_dest_bad_while_text_ignored
(
self
):
validator
=
self
.
_create_validator
(
"""
[ignored text](nonexistent.html)
"""
)
self
.
assertFalse
(
validator
.
_validate_links
())
def
test_link_dest_good_while_text_ignored
(
self
):
validator
=
self
.
_create_validator
(
"""
[ignored text](01-one.html)
"""
)
self
.
assertTrue
(
validator
.
_validate_links
())
def
test_sample_file_passes_validation
(
self
):
sample_validator
=
self
.
VALIDATOR
(
self
.
SAMPLE_FILE
)
res
=
sample_validator
.
validate
()
...
...
This diff is collapsed.
Click to expand it.
tools/validation_helpers.py
+
9
−
3
View file @
02430798
...
...
@@ -121,21 +121,27 @@ class CommonMarkHelper(object):
return
dest
,
link_text
def
find_external_links
(
self
,
ast_node
=
None
):
def
find_external_links
(
self
,
ast_node
=
None
,
parent_crit
=
None
):
"""
Recursive function that locates all references to external content
under specified node. (links or images)
"""
ast_node
=
ast_node
or
self
.
data
if
parent_crit
is
None
:
# User can optionally provide a function to filter link list
# based on where link appears. (eg, only links in headings)
# If no filter is provided, accept all links in that node.
parent_crit
=
lambda
n
:
True
# Link can be node itself, or hiding in inline content
links
=
[
n
for
n
in
ast_node
.
inline_content
if
self
.
is_external
(
n
)]
if
self
.
is_external
(
n
)
and
parent_crit
(
ast_node
)
]
if
self
.
is_external
(
ast_node
):
links
.
append
(
ast_node
)
# Also look for links in sub-nodes
for
n
in
ast_node
.
children
:
links
.
extend
(
self
.
find_external_links
(
n
))
links
.
extend
(
self
.
find_external_links
(
n
,
parent_crit
=
parent_crit
))
return
links
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment