esp-idf/tools/ci/check_readme_links.py
Marius Vikhammer 4bf7b16358 check_readme_links: remove throwing of exception before exit
Reraising the exception before exiting was intended to help troubleshoot,
but turned out to be more confusing than helpful as it might look like the script was failing
2021-04-26 15:36:30 +08:00

163 lines
4.9 KiB
Python
Executable File

#!/usr/bin/env python
#
# Checks that all links in the readme markdown files are valid
#
# Copyright 2020 Espressif Systems (Shanghai) PTE LTD
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import argparse
import concurrent.futures
import os
import os.path
import re
import sys
import urllib.error
import urllib.request
from collections import defaultdict, namedtuple
from pathlib import Path
EXCLUDE_DOCS_LIST = ['examples/peripherals/secure_element/atecc608_ecdsa/components/esp-cryptoauthlib/cryptoauthlib/**']
# The apple apps links are not accessible from the company network for some reason
EXCLUDE_URL_LIST = ['https://apps.apple.com/in/app/esp-ble-provisioning/id1473590141', 'https://apps.apple.com/in/app/esp-softap-provisioning/id1474040630']
Link = namedtuple('Link', ['file', 'url'])
class ReadmeLinkError(Exception):
def __init__(self, file, url):
self.file = file
self.url = url
class RelativeLinkError(ReadmeLinkError):
def __str__(self):
return 'Relative link error, file - {} not found, linked from {}'.format(self.url, self.file)
class UrlLinkError(ReadmeLinkError):
def __init__(self, file, url, error_code):
self.error_code = error_code
super().__init__(file, url)
def __str__(self):
files = [str(f) for f in self.file]
return 'URL error, url - {} in files - {} is not accessible, request returned {}'.format(self.url, ', '.join(files), self.error_code)
# we do not want a failed test just due to bad network conditions, for non 404 errors we simply print a warning
def check_url(url, files, timeout):
try:
with urllib.request.urlopen(url, timeout=timeout):
return
except urllib.error.HTTPError as e:
if e.code == 404:
raise UrlLinkError(files, url, str(e))
else:
print('Unable to access {}, err = {}'.format(url, str(e)))
except Exception as e:
print('Unable to access {}, err = {}'.format(url, str(e)))
def check_web_links(web_links):
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
errors = []
future_to_url = {executor.submit(check_url, url, files, timeout=30): (url, files) for url, files in web_links.items()}
for future in concurrent.futures.as_completed(future_to_url):
try:
future.result()
except UrlLinkError as e:
errors.append(e)
return errors
def check_file_links(file_links):
errors = []
for link in file_links:
link_path = link.file.parent / link.url
if not Path.exists(link_path):
errors.append(RelativeLinkError(link.file, link.url))
print('Found {} errors with relative links'.format(len(errors)))
return errors
def get_md_links(folder):
MD_LINK_RE = r'\[.+?\]\((.+?)(#.+)?\)'
idf_path = Path(os.getenv('IDF_PATH'))
links = []
for path in (idf_path / folder).rglob('*.md'):
if any([path.relative_to(idf_path).match(exclude_doc) for exclude_doc in EXCLUDE_DOCS_LIST]):
print('{} - excluded'.format(path))
continue
with path.open(encoding='utf8') as f:
content = f.read()
for url in re.findall(MD_LINK_RE, content):
link = Link(path, url[0].lstrip())
# Ignore "local" links
if not link.url.startswith('#'):
links.append(link)
return links
def check_readme_links(args):
links = get_md_links('examples')
print('Found {} links'.format(len(links)))
errors = []
web_links = defaultdict(list)
file_links = []
# Sort links into file and web links
for link in links:
if link.url.startswith('http'):
web_links[link.url].append(link.file)
else:
file_links.append(link)
for url in EXCLUDE_URL_LIST:
del web_links[url]
errors.extend(check_file_links(file_links))
if not args.skip_weburl:
errors.extend(check_web_links(web_links))
print('Found {} errors:'.format(len(errors)))
for e in errors:
print(e)
return 1 if len(errors) > 0 else 0
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='check_readme_links.py: Checks for dead links in example READMEs', prog='check_readme_links.py')
parser.add_argument('--skip-weburl', '-w', action='store_true', help='Skip checking of web URLs, only check links to local files')
args = parser.parse_args()
sys.exit(check_readme_links(args))