Add in-place substitution option for linkchecker.py (#41983)
* Add in-place replacement option for linkchecker.py Add a new flag '-w' to enable an experimental in-place replacement for Markdown links only. * Apply suggestions from code review Use formatted string literals instead of simple concatenation. Co-authored-by: Matt Boersma <Matt.Boersma@microsoft.com> * Remove other paths that should not be changed. * Add more logic to remove paths that start with http or paths that are already linking to the localized page (i.e. start with '/<language-code>'). * Apply suggestions from code review Simplify expressions. Co-authored-by: Matt Boersma <Matt.Boersma@microsoft.com> * Avoid updating pages in English. * Fix syntax error in set comprehension * Expand on documentation for new -w flag * Update documentation for linkchecker.py in README * Add a blurb with information about the new -w switch that describes what it does and what is the purpose of adding this behaviour change. * Update the previously existing description to match the currently available script flags. --------- Co-authored-by: Matt Boersma <Matt.Boersma@microsoft.com>
This commit is contained in:
parent
1c28e2d882
commit
a3b7cfd886
|
@ -7,12 +7,12 @@
|
|||
| `test_examples.sh` | This script tests whether a change affects example files bundled in the website. |
|
||||
| `check-headers-file.sh` | This script checks the headers if you are in a production environment. |
|
||||
| `diff_l10n_branches.py` | This script generates a report of outdated contents in `content/<l10n-lang>` directory by comparing two l10n team milestone branches. |
|
||||
| `hash-files.sh` | This script emits as hash for the files listed in $@ |
|
||||
| `linkchecker.py` | This a link checker for Kubernetes documentation website. |
|
||||
| `lsync.sh` | This script checks if the English version of a page has changed since a localized page has been committed. |
|
||||
| `replace-capture.sh` | This script sets K8S_WEBSITE in your env to your docs website root or rely on this script to determine it automatically |
|
||||
| `check-ctrlcode.py` | This script finds control-code(0x00-0x1f) in text files. |
|
||||
| `ja/verify-spelling.sh` | This script finds Japanese words that are against the guideline. |
|
||||
| `hash-files.sh` | This script emits as hash for the files listed in $@ |
|
||||
| `linkchecker.py` | This a link checker for Kubernetes documentation website. |
|
||||
| `lsync.sh` | This script checks if the English version of a page has changed since a localized page has been committed. |
|
||||
| `replace-capture.sh` | This script sets K8S_WEBSITE in your env to your docs website root or rely on this script to determine it automatically |
|
||||
| `check-ctrlcode.py` | This script finds control-code(0x00-0x1f) in text files. |
|
||||
| `ja/verify-spelling.sh` | This script finds Japanese words that are against the guideline. |
|
||||
|
||||
|
||||
|
||||
|
@ -104,14 +104,17 @@ This script emits as hash for the files listed in $@.
|
|||
## linkchecker.py
|
||||
|
||||
This a link checker for Kubernetes documentation website.
|
||||
- We cover the following cases for the language you provide via `-l`, which
|
||||
defaults to 'en'.
|
||||
- If the language specified is not English (`en`), we check if you are
|
||||
actually using the localized links. For example, if you specify `zh` as
|
||||
the language, and for link target `/docs/foo/bar`, we check if the English
|
||||
version exists AND if the Chinese version exists as well. A checking record
|
||||
is produced if the link can use the localized version.
|
||||
|
||||
- If the language for the files scanned is not English (`en`), we check if you
|
||||
are actually using the localized links. For example, if you specify a filter
|
||||
similar to as `content/zh-cn/docs/**/*.md`, we check if the English version
|
||||
exists AND if the Chinese version exists as well. A checking record is
|
||||
produced if the link can use the localized version.
|
||||
- If the language specified is not English (`en`), a checking record is produced,
|
||||
and the `-w` switch is used, the script will perform in-place substitutions
|
||||
for links that have the format `/docs` and currently have a localized version
|
||||
available. This is an experimental feature and aims to reduce the amount of
|
||||
work required to update links to point to localized content. It currently
|
||||
works for Markdown files only.
|
||||
```
|
||||
|
||||
Usage: linkchecker.py -h
|
||||
|
|
|
@ -328,6 +328,7 @@ def check_target(page, anchor, target):
|
|||
return None
|
||||
msg = ("Localized page detected, please append '/%s' to the target"
|
||||
% LANG)
|
||||
|
||||
return new_record("ERROR", msg, target)
|
||||
|
||||
# taget might be a redirect entry
|
||||
|
@ -390,7 +391,7 @@ def check_apiref_target(target, anchor):
|
|||
target+"#"+anchor)
|
||||
|
||||
|
||||
def validate_links(page):
|
||||
def validate_links(page, in_place_edit):
|
||||
"""Find and validate links on a content page.
|
||||
|
||||
The checking records are consolidated into the global variable RESULT.
|
||||
|
@ -410,10 +411,34 @@ def validate_links(page):
|
|||
|
||||
matches = regex.findall(content)
|
||||
records = []
|
||||
target_records = []
|
||||
for m in matches:
|
||||
r = check_target(page, m[0], m[1])
|
||||
if r:
|
||||
records.append(r)
|
||||
target_records.append(m[1])
|
||||
|
||||
# if multiple records are the same they need not be checked repeatedly
|
||||
# remove paths that are not relative too
|
||||
target_records = {item for item in target_records
|
||||
if not item.startswith("http") and
|
||||
not item.startswith(f"/{LANG}")}
|
||||
|
||||
# English-language pages don't have "en" in their path
|
||||
if in_place_edit and target_records and LANG != "en":
|
||||
updated_data = []
|
||||
for line in data:
|
||||
if any(rec in line for rec in target_records):
|
||||
for rec in target_records:
|
||||
line = line.replace(
|
||||
f"({rec})",
|
||||
# assumes unlocalized links are in "/docs/..." format
|
||||
f"(/{LANG}{rec})")
|
||||
updated_data.append(line)
|
||||
|
||||
with open(page, "w") as f:
|
||||
for line in updated_data:
|
||||
f.write(line)
|
||||
|
||||
# searches for pattern: {{< api-reference page="" anchor=""
|
||||
apiref_re = r"{{ *< *api-reference page=\"([^\"]*?)\" *anchor=\"(.*?)\""
|
||||
|
@ -455,6 +480,9 @@ def parse_arguments():
|
|||
metavar="<FILTER>",
|
||||
help=("File pattern to scan. "
|
||||
"(default='content/en/docs/**/*.md')"))
|
||||
PARSER.add_argument("-w", dest="in_place_edit", action="store_true",
|
||||
help="[EXPERIMENTAL] Turns on in-place replacement "
|
||||
"for localized content.")
|
||||
|
||||
return PARSER.parse_args()
|
||||
|
||||
|
@ -500,7 +528,7 @@ def main():
|
|||
|
||||
folders = [f for f in glob.glob(ARGS.filter, recursive=True)]
|
||||
for page in folders:
|
||||
validate_links(page)
|
||||
validate_links(page, ARGS.in_place_edit)
|
||||
|
||||
dump_result()
|
||||
|
||||
|
|
Loading…
Reference in New Issue