diff options
Diffstat (limited to 'utils/checkstyle.py')
-rwxr-xr-x | utils/checkstyle.py | 236 |
1 files changed, 207 insertions, 29 deletions
diff --git a/utils/checkstyle.py b/utils/checkstyle.py index f0248d65..88078a61 100755 --- a/utils/checkstyle.py +++ b/utils/checkstyle.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0-or-later # Copyright (C) 2018, Google Inc. # @@ -168,6 +168,12 @@ def parse_diff(diff): hunk = DiffHunk(line) elif hunk is not None: + # Work around https://github.com/python/cpython/issues/46395 + # See https://www.gnu.org/software/diffutils/manual/html_node/Incomplete-Lines.html + if line[-1] != '\n': + hunk.append(line + '\n') + line = '\\ No newline at end of file\n' + hunk.append(line) if hunk: @@ -191,6 +197,9 @@ class CommitFile: else: self.__filename = info[1] + def __repr__(self): + return f'{self.__status} {self.__filename}' + @property def filename(self): return self.__filename @@ -203,16 +212,30 @@ class CommitFile: class Commit: def __init__(self, commit): self.commit = commit + self._trailers = [] self._parse() + def _parse_trailers(self, lines): + for index in range(1, len(lines)): + line = lines[index] + if not line: + break + + self._trailers.append(line) + + return index + def _parse(self): # Get the commit title and list of files. - ret = subprocess.run(['git', 'show', '--pretty=oneline', '--name-status', + ret = subprocess.run(['git', 'show', '--format=%s%n%(trailers:only,unfold)', '--name-status', self.commit], stdout=subprocess.PIPE).stdout.decode('utf-8') - files = ret.splitlines() - self._files = [CommitFile(f) for f in files[1:]] - self._title = files[0] + lines = ret.splitlines() + + self._title = lines[0] + + index = self._parse_trailers(lines) + self._files = [CommitFile(f) for f in lines[index:] if f] def files(self, filter='AMR'): return [f.filename for f in self._files if f.status in filter] @@ -221,6 +244,10 @@ class Commit: def title(self): return self._title + @property + def trailers(self): + return self._trailers + def get_diff(self, top_level, filename): diff = subprocess.run(['git', 'diff', '%s~..%s' % (self.commit, self.commit), '--', '%s/%s' % (top_level, filename)], @@ -249,15 +276,21 @@ class StagedChanges(Commit): return parse_diff(diff.splitlines(True)) -class Amendment(StagedChanges): +class Amendment(Commit): def __init__(self): - StagedChanges.__init__(self) + Commit.__init__(self, '') def _parse(self): - # Create a title using HEAD commit - ret = subprocess.run(['git', 'show', '--pretty=oneline', '--no-patch'], + # Create a title using HEAD commit and parse the trailers. + ret = subprocess.run(['git', 'show', '--format=%H %s%n%(trailers:only,unfold)', + '--no-patch'], stdout=subprocess.PIPE).stdout.decode('utf-8') - self._title = 'Amendment of ' + ret.strip() + lines = ret.splitlines() + + self._title = 'Amendment of ' + lines[0].strip() + + self._parse_trailers(lines) + # Extract the list of modified files ret = subprocess.run(['git', 'diff', '--staged', '--name-status', 'HEAD~'], stdout=subprocess.PIPE).stdout.decode('utf-8') @@ -298,8 +331,10 @@ class CommitChecker(metaclass=ClassRegistry): # Class methods # @classmethod - def checkers(cls): + def checkers(cls, names): for checker in cls.subclasses: + if names and checker.__name__ not in names: + continue yield checker @@ -313,7 +348,7 @@ class HeaderAddChecker(CommitChecker): def check(cls, commit, top_level): issues = [] - meson_files = [f for f in commit.files('M') + meson_files = [f for f in commit.files() if os.path.basename(f) == 'meson.build'] for filename in commit.files('AR'): @@ -352,6 +387,137 @@ class HeaderAddChecker(CommitChecker): return issues +class TitleChecker(CommitChecker): + prefix_regex = re.compile(r'^([a-zA-Z0-9_.-]+: )+') + release_regex = re.compile(r'libcamera v[0-9]+\.[0-9]+\.[0-9]+') + + @classmethod + def check(cls, commit, top_level): + title = commit.title + + # Skip the check when validating staged changes (as done through a + # pre-commit hook) as there is no title to check in that case. + if isinstance(commit, StagedChanges): + return [] + + # Ignore release commits, they don't need a prefix. + if TitleChecker.release_regex.fullmatch(title): + return [] + + prefix_pos = title.find(': ') + if prefix_pos != -1 and prefix_pos != len(title) - 2: + return [] + + # Find prefix candidates by searching the git history + msgs = subprocess.run(['git', 'log', '--no-decorate', '--oneline', '-n100', '--'] + commit.files(), + stdout=subprocess.PIPE).stdout.decode('utf-8') + prefixes = {} + prefixes_count = 0 + for msg in msgs.splitlines(): + prefix = TitleChecker.prefix_regex.match(msg) + if not prefix: + continue + + prefix = prefix.group(0) + if prefix in prefixes: + prefixes[prefix] += 1 + else: + prefixes[prefix] = 1 + + prefixes_count += 1 + + if not prefixes: + return [CommitIssue('Commit title is missing prefix')] + + # Sort the candidates by number of occurrences and pick the best ones. + # When multiple prefixes are possible without a clear winner, we want to + # display the most common options to the user, but without the most + # unlikely options to avoid too long messages. As a heuristic, select + # enough candidates to cover at least 2/3 of the possible prefixes, but + # never more than 4 candidates. + prefixes = list(prefixes.items()) + prefixes.sort(key=lambda x: x[1], reverse=True) + + candidates = [] + candidates_count = 0 + for prefix in prefixes: + candidates.append(f"`{prefix[0]}'") + candidates_count += prefix[1] + if candidates_count >= prefixes_count * 2 / 3 or \ + len(candidates) == 4: + break + + candidates = candidates[:-2] + [' or '.join(candidates[-2:])] + candidates = ', '.join(candidates) + + return [CommitIssue('Commit title is missing prefix, ' + 'possible candidates are ' + candidates)] + + +class TrailersChecker(CommitChecker): + commit_regex = re.compile(r'[0-9a-f]{12}[0-9a-f]* \(".*"\)') + + coverity_regex = re.compile(r'Coverity CID=.*') + + # Simple e-mail address validator regex, with an additional trailing + # comment. The complexity of a full RFC6531 validator isn't worth the + # additional invalid addresses it would reject. + email_regex = re.compile(r'[^<]+ <[^@>]+@[^>]+>( # .*)?') + + link_regex = re.compile(r'https?://.*') + + @staticmethod + def validate_reported_by(value): + if TrailersChecker.email_regex.fullmatch(value): + return True + if TrailersChecker.coverity_regex.fullmatch(value): + return True + return False + + known_trailers = { + 'Acked-by': email_regex, + 'Bug': link_regex, + 'Co-developed-by': email_regex, + 'Fixes': commit_regex, + 'Link': link_regex, + 'Reported-by': validate_reported_by, + 'Reviewed-by': email_regex, + 'Signed-off-by': email_regex, + 'Suggested-by': email_regex, + 'Tested-by': email_regex, + } + + trailer_regex = re.compile(r'([A-Z][a-zA-Z-]*)\s*:\s*(.*)') + + @classmethod + def check(cls, commit, top_level): + issues = [] + + for trailer in commit.trailers: + match = TrailersChecker.trailer_regex.fullmatch(trailer) + if not match: + issues.append(CommitIssue(f"Malformed commit trailer '{trailer}'")) + continue + + key, value = match.groups() + + validator = TrailersChecker.known_trailers.get(key) + if not validator: + issues.append(CommitIssue(f"Invalid commit trailer key '{key}'")) + continue + + if isinstance(validator, re.Pattern): + valid = bool(validator.fullmatch(value)) + else: + valid = validator(value) + + if not valid: + issues.append(CommitIssue(f"Malformed value '{value}' for commit trailer '{key}'")) + continue + + return issues + + # ------------------------------------------------------------------------------ # Style Checkers # @@ -366,8 +532,10 @@ class StyleChecker(metaclass=ClassRegistry): # Class methods # @classmethod - def checkers(cls, filename): + def checkers(cls, filename, names): for checker in cls.subclasses: + if names and checker.__name__ not in names: + continue if checker.supports(filename): yield checker @@ -401,7 +569,7 @@ class IncludeChecker(StyleChecker): 'limits', 'locale', 'setjmp', 'signal', 'stdarg', 'stddef', 'stdint', 'stdio', 'stdlib', 'string', 'time', 'uchar', 'wchar', 'wctype') - include_regex = re.compile('^#include <c([a-z]*)>') + include_regex = re.compile(r'^#include <c([a-z]*)>') def __init__(self, content): super().__init__() @@ -427,7 +595,7 @@ class IncludeChecker(StyleChecker): class LogCategoryChecker(StyleChecker): - log_regex = re.compile('\\bLOG\((Debug|Info|Warning|Error|Fatal)\)') + log_regex = re.compile(r'\bLOG\((Debug|Info|Warning|Error|Fatal)\)') patterns = ('*.cpp',) def __init__(self, content): @@ -464,7 +632,7 @@ class MesonChecker(StyleChecker): class Pep8Checker(StyleChecker): patterns = ('*.py',) - results_regex = re.compile('stdin:([0-9]+):([0-9]+)(.*)') + results_regex = re.compile(r'stdin:([0-9]+):([0-9]+)(.*)') def __init__(self, content): super().__init__() @@ -497,7 +665,7 @@ class Pep8Checker(StyleChecker): class ShellChecker(StyleChecker): patterns = ('*.sh',) - results_line_regex = re.compile('In - line ([0-9]+):') + results_line_regex = re.compile(r'In - line ([0-9]+):') def __init__(self, content): super().__init__() @@ -547,8 +715,10 @@ class Formatter(metaclass=ClassRegistry): # Class methods # @classmethod - def formatters(cls, filename): + def formatters(cls, filename, names): for formatter in cls.subclasses: + if names and formatter.__name__ not in names: + continue if formatter.supports(filename): yield formatter @@ -583,7 +753,8 @@ class CLangFormatter(Formatter): class DoxygenFormatter(Formatter): patterns = ('*.c', '*.cpp') - return_regex = re.compile(' +\\* +\\\\return +[a-z]') + oneliner_regex = re.compile(r'^ +\* +\\(brief|param|return)\b.*\.$') + return_regex = re.compile(r' +\* +\\return +[a-z]') @classmethod def format(cls, filename, data): @@ -598,6 +769,7 @@ class DoxygenFormatter(Formatter): lines.append(line) continue + line = cls.oneliner_regex.sub(lambda m: m.group(0)[:-1], line) line = cls.return_regex.sub(lambda m: m.group(0)[:-1] + m.group(0)[-1].upper(), line) if line.find('*/') != -1: @@ -643,7 +815,7 @@ class DPointerFormatter(Formatter): class IncludeOrderFormatter(Formatter): patterns = ('*.cpp', '*.h') - include_regex = re.compile('^#include (["<])([^">]*)([">])') + include_regex = re.compile(r'^#include (["<])([^">]*)([">])') @classmethod def format(cls, filename, data): @@ -710,7 +882,7 @@ class StripTrailingSpaceFormatter(Formatter): # Style checking # -def check_file(top_level, commit, filename): +def check_file(top_level, commit, filename, checkers): # Extract the line numbers touched by the commit. commit_diff = commit.get_diff(top_level, filename) @@ -727,7 +899,7 @@ def check_file(top_level, commit, filename): after = commit.get_file(filename) formatted = after - for formatter in Formatter.formatters(filename): + for formatter in Formatter.formatters(filename, checkers): formatted = formatter.format(filename, formatted) after = after.splitlines(True) @@ -741,7 +913,7 @@ def check_file(top_level, commit, filename): # Check for code issues not related to formatting. issues = [] - for checker in StyleChecker.checkers(filename): + for checker in StyleChecker.checkers(filename, checkers): checker = checker(after) for hunk in commit_diff: issues += checker.check(hunk.side('to').touched) @@ -769,16 +941,17 @@ def check_file(top_level, commit, filename): return len(formatted_diff) + len(issues) -def check_style(top_level, commit): - separator = '-' * len(commit.title) +def check_style(top_level, commit, checkers): + title = commit.commit + ' ' + commit.title + separator = '-' * len(title) print(separator) - print(commit.title) + print(title) print(separator) issues = 0 # Apply the commit checkers first. - for checker in CommitChecker.checkers(): + for checker in CommitChecker.checkers(checkers): for issue in checker.check(commit, top_level): print('%s%s%s' % (Colours.fg(Colours.Yellow), issue.msg, Colours.reset())) issues += 1 @@ -790,7 +963,7 @@ def check_style(top_level, commit): files = [f for f in commit.files() if len([p for p in patterns if fnmatch.fnmatch(os.path.basename(f), p)])] for f in files: - issues += check_file(top_level, commit, f) + issues += check_file(top_level, commit, f, checkers) if issues == 0: print('No issue detected') @@ -840,6 +1013,8 @@ def main(argv): # Parse command line arguments parser = argparse.ArgumentParser() + parser.add_argument('--checkers', '-c', type=str, + help='Specify which checkers to run as a comma-separated list. Defaults to all checkers') parser.add_argument('--staged', '-s', action='store_true', help='Include the changes in the index. Defaults to False') parser.add_argument('--amend', '-a', action='store_true', @@ -848,6 +1023,9 @@ def main(argv): help='Revision range (as defined by git rev-parse). Defaults to HEAD if not specified.') args = parser.parse_args(argv[1:]) + if args.checkers: + args.checkers = args.checkers.split(',') + # Check for required dependencies. for command, mandatory in dependencies.items(): found = shutil.which(command) @@ -881,7 +1059,7 @@ def main(argv): issues = 0 for commit in commits: - issues += check_style(top_level, commit) + issues += check_style(top_level, commit, args.checkers) print('') if issues: |