2020-07-24 12:51:34 -04:00
|
|
|
#!/usr/bin/env python
|
|
|
|
#
|
|
|
|
# Utility script for ESP-IDF developers to work with the CODEOWNERS file.
|
|
|
|
#
|
2021-09-24 00:06:10 -04:00
|
|
|
# SPDX-FileCopyrightText: 2020-2021 Espressif Systems (Shanghai) CO LTD
|
|
|
|
# SPDX-License-Identifier: Apache-2.0
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
import argparse
|
|
|
|
import os
|
2020-09-12 07:30:09 -04:00
|
|
|
import re
|
2020-07-24 12:51:34 -04:00
|
|
|
import subprocess
|
|
|
|
import sys
|
|
|
|
|
2020-10-21 07:30:49 -04:00
|
|
|
from idf_ci_utils import IDF_PATH
|
2020-09-12 07:30:09 -04:00
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
CODEOWNERS_PATH = os.path.join(IDF_PATH, '.gitlab', 'CODEOWNERS')
|
|
|
|
CODEOWNER_GROUP_PREFIX = '@esp-idf-codeowners/'
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
|
2020-09-12 07:30:09 -04:00
|
|
|
def get_all_files():
|
|
|
|
"""
|
|
|
|
Get list of all file paths in the repository.
|
|
|
|
"""
|
|
|
|
# only split on newlines, since file names may contain spaces
|
2021-01-25 21:49:01 -05:00
|
|
|
return subprocess.check_output(['git', 'ls-files'], cwd=IDF_PATH).decode('utf-8').strip().split('\n')
|
2020-09-12 07:30:09 -04:00
|
|
|
|
|
|
|
|
|
|
|
def pattern_to_regex(pattern):
|
|
|
|
"""
|
|
|
|
Convert the CODEOWNERS path pattern into a regular expression string.
|
|
|
|
"""
|
|
|
|
orig_pattern = pattern # for printing errors later
|
|
|
|
|
|
|
|
# Replicates the logic from normalize_pattern function in Gitlab ee/lib/gitlab/code_owners/file.rb:
|
|
|
|
if not pattern.startswith('/'):
|
|
|
|
pattern = '/**/' + pattern
|
|
|
|
if pattern.endswith('/'):
|
|
|
|
pattern = pattern + '**/*'
|
|
|
|
|
|
|
|
# Convert the glob pattern into a regular expression:
|
|
|
|
# first into intermediate tokens
|
|
|
|
pattern = (pattern.replace('**/', ':REGLOB:')
|
|
|
|
.replace('**', ':INVALID:')
|
|
|
|
.replace('*', ':GLOB:')
|
|
|
|
.replace('.', ':DOT:')
|
|
|
|
.replace('?', ':ANY:'))
|
|
|
|
|
|
|
|
if pattern.find(':INVALID:') >= 0:
|
|
|
|
raise ValueError("Likely invalid pattern '{}': '**' should be followed by '/'".format(orig_pattern))
|
|
|
|
|
|
|
|
# then into the final regex pattern:
|
|
|
|
re_pattern = (pattern.replace(':REGLOB:', '(?:.*/)?')
|
|
|
|
.replace(':GLOB:', '[^/]*')
|
|
|
|
.replace(':DOT:', '[.]')
|
|
|
|
.replace(':ANY:', '.') + '$')
|
|
|
|
if re_pattern.startswith('/'):
|
|
|
|
re_pattern = '^' + re_pattern
|
|
|
|
|
|
|
|
return re_pattern
|
|
|
|
|
|
|
|
|
|
|
|
def files_by_regex(all_files, regex):
|
|
|
|
"""
|
|
|
|
Return all files in the repository matching the given regular expresion.
|
|
|
|
"""
|
|
|
|
return [file for file in all_files if regex.search('/' + file)]
|
|
|
|
|
|
|
|
|
|
|
|
def files_by_pattern(all_files, pattern=None):
|
|
|
|
"""
|
|
|
|
Return all the files in the repository matching the given CODEOWNERS pattern.
|
|
|
|
"""
|
|
|
|
if not pattern:
|
|
|
|
return all_files
|
|
|
|
|
|
|
|
return files_by_regex(all_files, re.compile(pattern_to_regex(pattern)))
|
|
|
|
|
|
|
|
|
2020-07-24 12:51:34 -04:00
|
|
|
def action_identify(args):
|
|
|
|
best_match = []
|
2020-09-12 07:30:09 -04:00
|
|
|
all_files = get_all_files()
|
2020-07-24 12:51:34 -04:00
|
|
|
with open(CODEOWNERS_PATH) as f:
|
|
|
|
for line in f:
|
|
|
|
line = line.strip()
|
2021-01-25 21:49:01 -05:00
|
|
|
if not line or line.startswith('#'):
|
2020-07-24 12:51:34 -04:00
|
|
|
continue
|
|
|
|
tokens = line.split()
|
|
|
|
path_pattern = tokens[0]
|
|
|
|
owners = tokens[1:]
|
2020-09-12 07:30:09 -04:00
|
|
|
files = files_by_pattern(all_files, path_pattern)
|
2020-07-24 12:51:34 -04:00
|
|
|
if args.path in files:
|
|
|
|
best_match = owners
|
|
|
|
for owner in best_match:
|
|
|
|
print(owner)
|
|
|
|
|
|
|
|
|
2020-09-12 07:30:09 -04:00
|
|
|
def action_test_pattern(args):
|
|
|
|
re_pattern = pattern_to_regex(args.pattern)
|
|
|
|
|
|
|
|
if args.regex:
|
|
|
|
print(re_pattern)
|
|
|
|
return
|
|
|
|
|
|
|
|
files = files_by_regex(get_all_files(), re.compile(re_pattern))
|
|
|
|
for f in files:
|
|
|
|
print(f)
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
|
|
|
|
def action_ci_check(args):
|
|
|
|
errors = []
|
|
|
|
|
|
|
|
def add_error(msg):
|
2021-01-25 21:49:01 -05:00
|
|
|
errors.append('{}:{}: {}'.format(CODEOWNERS_PATH, line_no, msg))
|
2020-07-24 12:51:34 -04:00
|
|
|
|
2020-09-12 07:30:09 -04:00
|
|
|
all_files = get_all_files()
|
2021-01-25 21:49:01 -05:00
|
|
|
prev_path_pattern = ''
|
2020-07-24 12:51:34 -04:00
|
|
|
with open(CODEOWNERS_PATH) as f:
|
|
|
|
for line_no, line in enumerate(f, start=1):
|
|
|
|
# Skip empty lines and comments
|
|
|
|
line = line.strip()
|
2021-01-25 21:49:01 -05:00
|
|
|
if line.startswith('# sort-order-reset'):
|
|
|
|
prev_path_pattern = ''
|
2020-09-12 07:30:09 -04:00
|
|
|
|
2021-09-24 00:06:10 -04:00
|
|
|
if (not line
|
|
|
|
or line.startswith('#') # comment
|
|
|
|
or line.startswith('[') # file group
|
|
|
|
or line.startswith('^[')): # optional file group
|
2020-07-24 12:51:34 -04:00
|
|
|
continue
|
|
|
|
|
|
|
|
# Each line has a form of "<path> <owners>+"
|
|
|
|
tokens = line.split()
|
|
|
|
path_pattern = tokens[0]
|
|
|
|
owners = tokens[1:]
|
|
|
|
if not owners:
|
2021-01-25 21:49:01 -05:00
|
|
|
add_error('no owners specified for {}'.format(path_pattern))
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
# Check that the file is sorted by path patterns
|
2021-07-11 21:27:22 -04:00
|
|
|
if not in_order(prev_path_pattern, path_pattern):
|
|
|
|
add_error('file is not sorted: {} < {}'.format(path_pattern, prev_path_pattern))
|
|
|
|
prev_path_pattern = path_pattern
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
# Check that the pattern matches at least one file
|
2020-09-12 07:30:09 -04:00
|
|
|
files = files_by_pattern(all_files, path_pattern)
|
2020-07-24 12:51:34 -04:00
|
|
|
if not files:
|
2021-01-25 21:49:01 -05:00
|
|
|
add_error('no files matched by pattern {}'.format(path_pattern))
|
2020-07-24 12:51:34 -04:00
|
|
|
|
|
|
|
for o in owners:
|
|
|
|
# Sanity-check the owner group name
|
|
|
|
if not o.startswith(CODEOWNER_GROUP_PREFIX):
|
|
|
|
add_error("owner {} doesn't start with {}".format(o, CODEOWNER_GROUP_PREFIX))
|
|
|
|
|
|
|
|
if not errors:
|
2021-01-25 21:49:01 -05:00
|
|
|
print('No errors found.')
|
2020-07-24 12:51:34 -04:00
|
|
|
else:
|
2021-01-25 21:49:01 -05:00
|
|
|
print('Errors found!')
|
2020-07-24 12:51:34 -04:00
|
|
|
for e in errors:
|
|
|
|
print(e)
|
|
|
|
raise SystemExit(1)
|
|
|
|
|
|
|
|
|
2021-07-11 21:27:22 -04:00
|
|
|
def in_order(prev, current):
|
|
|
|
"""
|
|
|
|
Return True if the ordering is correct for these two lines ('prev' should be before 'current').
|
|
|
|
|
|
|
|
Codeowners should be ordered alphabetically, except that order is also significant for the codeowners
|
|
|
|
syntax (the last matching line has priority).
|
|
|
|
|
|
|
|
This means that wildcards are allowed in either order (if wildcard placed first, it's placed before a
|
|
|
|
more specific pattern as a catch-all fallback. If wildcard placed second, it's to override the match
|
|
|
|
made on a previous line i.e. '/xyz/**/*.py' to override the owner of the Python files inside /xyz/ ).
|
|
|
|
"""
|
|
|
|
if not prev:
|
|
|
|
return True # first element in file
|
|
|
|
|
|
|
|
def is_separator(c):
|
|
|
|
return c in '-_/' # ignore differences between separators for ordering purposes
|
|
|
|
|
|
|
|
def is_wildcard(c):
|
|
|
|
return c in '?*'
|
|
|
|
|
|
|
|
# looping until we see a different character
|
|
|
|
for a,b in zip(prev, current):
|
|
|
|
if is_separator(a) and is_separator(b):
|
|
|
|
continue
|
|
|
|
if is_wildcard(a) or is_wildcard(b):
|
|
|
|
return True # if the strings matched up to one of them having a wildcard, treat as in order
|
|
|
|
if a != b:
|
|
|
|
return b > a
|
|
|
|
assert a == b
|
|
|
|
|
|
|
|
# common substrings up to the common length are the same, so the longer string should be after
|
|
|
|
return len(current) >= len(prev)
|
|
|
|
|
|
|
|
|
2020-07-24 12:51:34 -04:00
|
|
|
def main():
|
|
|
|
parser = argparse.ArgumentParser(
|
2021-01-25 21:49:01 -05:00
|
|
|
sys.argv[0], description='Internal helper script for working with the CODEOWNERS file.'
|
2020-07-24 12:51:34 -04:00
|
|
|
)
|
2021-01-25 21:49:01 -05:00
|
|
|
subparsers = parser.add_subparsers(dest='action')
|
2020-09-12 07:30:09 -04:00
|
|
|
|
2020-07-24 12:51:34 -04:00
|
|
|
identify = subparsers.add_parser(
|
2021-01-25 21:49:01 -05:00
|
|
|
'identify',
|
|
|
|
help='List the owners of the specified path within IDF.'
|
2020-07-24 12:51:34 -04:00
|
|
|
"This command doesn't support files inside submodules, or files not added to git repository.",
|
|
|
|
)
|
2021-01-25 21:49:01 -05:00
|
|
|
identify.add_argument('path', help='Path of the file relative to the root of the repository')
|
2020-09-12 07:30:09 -04:00
|
|
|
|
2020-07-24 12:51:34 -04:00
|
|
|
subparsers.add_parser(
|
2021-01-25 21:49:01 -05:00
|
|
|
'ci-check',
|
|
|
|
help='Check CODEOWNERS file: every line should match at least one file, sanity-check group names, '
|
|
|
|
'check that the file is sorted by paths',
|
2020-07-24 12:51:34 -04:00
|
|
|
)
|
2020-09-12 07:30:09 -04:00
|
|
|
|
|
|
|
test_pattern = subparsers.add_parser(
|
2021-01-25 21:49:01 -05:00
|
|
|
'test-pattern',
|
|
|
|
help='Print files in the repository for a given CODEOWNERS pattern. Useful when adding new rules.'
|
2020-09-12 07:30:09 -04:00
|
|
|
)
|
2021-01-25 21:49:01 -05:00
|
|
|
test_pattern.add_argument('--regex', action='store_true', help='Print the equivalent regular expression instead of the file list.')
|
|
|
|
test_pattern.add_argument('pattern', help='Path pattern to get the list of files for')
|
2020-09-12 07:30:09 -04:00
|
|
|
|
2020-07-24 12:51:34 -04:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
if args.action is None:
|
|
|
|
parser.print_help()
|
|
|
|
parser.exit(1)
|
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
action_func_name = 'action_' + args.action.replace('-', '_')
|
2020-07-24 12:51:34 -04:00
|
|
|
action_func = globals()[action_func_name]
|
|
|
|
action_func(args)
|
|
|
|
|
|
|
|
|
2021-01-25 21:49:01 -05:00
|
|
|
if __name__ == '__main__':
|
2020-07-24 12:51:34 -04:00
|
|
|
main()
|