diff options
author | Kieran Bingham <kieran.bingham@ideasonboard.com> | 2021-07-12 12:48:13 +0100 |
---|---|---|
committer | Kieran Bingham <kieran.bingham@ideasonboard.com> | 2021-07-12 12:48:13 +0100 |
commit | d1a6438a6e4ce92fa85e9fcd31dfad2b340d5d62 (patch) | |
tree | 18f4a8559c817514c518ea47b5fbf8b475b9fd9c | |
parent | d0a4e15ceb53742a6f4e44b8ca39b87e501b3f22 (diff) |
ci: Add patchwork bot on libcamera tree
Signed-off-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
-rwxr-xr-x | 03-update-patchwork.sh | 17 | ||||
-rwxr-xr-x | scripts/git-patchwork-bot.py | 1386 |
2 files changed, 1403 insertions, 0 deletions
diff --git a/03-update-patchwork.sh b/03-update-patchwork.sh new file mode 100755 index 0000000..43295a6 --- /dev/null +++ b/03-update-patchwork.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# SPDX-License-Identifier: GPL-2.0-or-later + +source ./common.sh + +LIBCAMERA=${1:-$(pwd)/libcamera} +BOT=$(pwd)/scripts/git-patchwork-bot.py + +ID=patchwork-bot + +logfile=$(log_filename $ID) + +$BOT -v -r $LIBCAMERA/.git -l $logfile > $logfile.stdout.log 2> $logfile.stderr.log + +pass_fail $? "Run patchwork bot" + diff --git a/scripts/git-patchwork-bot.py b/scripts/git-patchwork-bot.py new file mode 100755 index 0000000..e32fef4 --- /dev/null +++ b/scripts/git-patchwork-bot.py @@ -0,0 +1,1386 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# This bot automatically recognizes when patchwork-tracked patches +# are applied to git repositories and marks them as "Accepted." It can +# additionally send mail notifications to the maintainers and to the +# patch submitters. +# +# It runs from a cronjob, but can be also run from post-update hooks with +# extra wrappers. For more details, consult: +# +# https://korg.wiki.kernel.org/userdoc/pwbot +# +# +from __future__ import (absolute_import, + division, + print_function, + unicode_literals) + +__author__ = 'Konstantin Ryabitsev <konstantin@linuxfoundation.org>' + +import os +import sys +import argparse +import smtplib +import subprocess +import sqlite3 +import logging +import hashlib +import re +import requests +import datetime +import time +import random + +from email.mime.text import MIMEText +from email.header import Header +from email.utils import formatdate, getaddresses + +from fcntl import lockf, LOCK_EX, LOCK_NB + +try: + import xmlrpclib +except ImportError: + # Python 3 has merged/renamed things. + import xmlrpc.client as xmlrpclib + +# Send all email 8-bit, this is not 1999 +from email import charset +charset.add_charset('utf-8', charset.SHORTEST, '8bit') + +DB_VERSION = 1 +REST_API_VERSION = '1.1' +HUNK_RE = re.compile(r'^@@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? @@') +FILENAME_RE = re.compile(r'^(---|\+\+\+) (\S+)') +REST_PER_PAGE = 50 + +_project_cache = None + +#logging.basicConfig(level=logging.INFO) +#logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger('gitpwcron') + +# Lifted from patchwork pwclient +class Transport(xmlrpclib.SafeTransport): + + def __init__(self, url): + xmlrpclib.SafeTransport.__init__(self) + self.credentials = None + self.host = None + self.proxy = None + self.scheme = url.split('://', 1)[0] + self.https = url.startswith('https') + if self.https: + self.proxy = os.environ.get('https_proxy') + else: + self.proxy = os.environ.get('http_proxy') + if self.proxy: + self.https = self.proxy.startswith('https') + + def set_credentials(self, username=None, password=None): + self.credentials = '%s:%s' % (username, password) + + def make_connection(self, host): + self.host = host + if self.proxy: + host = self.proxy.split('://', 1)[-1].rstrip('/') + if self.credentials: + host = '@'.join([self.credentials, host]) + if self.https: + return xmlrpclib.SafeTransport.make_connection(self, host) + else: + return xmlrpclib.Transport.make_connection(self, host) + + if sys.version_info[0] == 2: + # Python 2 + # noinspection PyArgumentList,PyMethodOverriding + def send_request(self, connection, handler, request_body): + handler = '%s://%s%s' % (self.scheme, self.host, handler) + xmlrpclib.Transport.send_request(self, connection, handler, + request_body) + else: + # Python 3 + def send_request(self, host, handler, request_body, debug): + handler = '%s://%s%s' % (self.scheme, host, handler) + print("Handler: " + handler) + return xmlrpclib.Transport.send_request(self, host, handler, + request_body, debug) + + +class Restmaker: + def __init__(self, server, settings): + self.server = server + self.url = '/'.join((server.rstrip('/'), 'api', REST_API_VERSION)) + self.headers = { + 'User-Agent': 'git-patchwork-bot', + } + # As long as the REST api does not expose filtering by hash, we have to use + # user/pass authentication for xmlrpc purposes. We'll implement token + # authentication when that stops being the case. + self.auth = requests.auth.HTTPBasicAuth(settings['user'], settings['pass']) + + self.series_url = '/'.join((self.url, 'series')) + self.patches_url = '/'.join((self.url, 'patches')) + self.covers_url = '/'.join((self.url, 'covers')) + + # Simple local cache + self._patches = dict() + + def get_cover(self, cover_id): + try: + logger.debug('Grabbing cover %d', cover_id) + url = '/'.join((self.covers_url, str(cover_id), '')) + logger.debug('url=%s', url) + rsp = requests.get(url, auth=self.auth, headers=self.headers, + params=list(), stream=False) + rsp.raise_for_status() + return rsp.json() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + return None + + def get_patch(self, patch_id): + if patch_id not in self._patches: + try: + logger.debug('Grabbing patch %d', patch_id) + url = '/'.join((self.patches_url, str(patch_id), '')) + logger.debug('url=%s', url) + rsp = requests.get(url, auth=self.auth, headers=self.headers, + params=list(), stream=False) + rsp.raise_for_status() + self._patches[patch_id] = rsp.json() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + self._patches[patch_id] = None + + return self._patches[patch_id] + + def get_series(self, series_id): + try: + logger.debug('Grabbing series %d', series_id) + url = '/'.join((self.series_url, str(series_id), '')) + logger.debug('url=%s', url) + rsp = requests.get(url, auth=self.auth, headers=self.headers, + params=list(), stream=False) + rsp.raise_for_status() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + return None + + return rsp.json() + + def get_patch_list(self, params): + try: + logger.debug('Grabbing patch list with params=%s', params) + rsp = requests.get(self.patches_url, auth=self.auth, headers=self.headers, + params=params, stream=False) + rsp.raise_for_status() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + return None + + return rsp.json() + + def get_series_list(self, params): + try: + logger.debug('Grabbing series with params=%s', params) + rsp = requests.get(self.series_url, auth=self.auth, headers=self.headers, + params=params, stream=False) + rsp.raise_for_status() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + return None + + return rsp.json() + + def update_patch(self, patch_id, state=None, archived=False, commit_ref=None): + # Clear it out of the cache + if patch_id in self._patches: + del self._patches[patch_id] + + try: + logger.debug('Updating patch %d:', patch_id) + url = '/'.join((self.patches_url, str(patch_id), '')) + logger.debug('url=%s', url) + data = list() + if state is not None: + logger.debug(' state=%s', state) + data.append(('state', state)) + if archived: + logger.debug(' archived=True') + data.append(('archived', True)) + if commit_ref is not None: + logger.debug(' commit_ref=%s', commit_ref) + data.append(('commit_ref', commit_ref)) + + rsp = requests.patch(url, auth=self.auth, headers=self.headers, + data=data, stream=False) + rsp.raise_for_status() + except requests.exceptions.RequestException as ex: + logger.info('REST error: %s', ex) + return None + + return rsp.json() + + +# Python-2.7 doesn't have a domain= keyword argument, so steal make_msgid from python-3.2+ +def make_msgid(idstring=None, domain='kernel.org'): + timeval = int(time.time()*100) + pid = os.getpid() + randint = random.getrandbits(64) + if idstring is None: + idstring = '' + else: + idstring = '.' + idstring + + return '<%d.%d.%d%s@%s>' % (timeval, pid, randint, idstring, domain) + + +def get_patchwork_patches_by_project_id_hash(rpc, project_id, pwhash): + logger.debug('Looking up %s', pwhash) + try: + patches = rpc.patch_list({'project_id': project_id, 'hash': pwhash, 'archived': False}) + except xmlrpclib.Fault as ex: + logger.debug('Got a Fault: %s', ex.faultString) + return None + + if not patches: + logger.debug('No match for hash=%s', pwhash) + return None + + return [patch['id'] for patch in patches] + + +def get_patchwork_patches_by_project_id_name(rpc, project_id, name): + logger.debug('Looking up %s', name) + try: + patches = rpc.patch_list({'project_id': project_id, 'name': name, 'archived': False}) + except xmlrpclib.Fault as ex: + logger.debug('Got a Fault: %s', ex.faultString) + return None + + if not patches: + logger.debug('No match for name=%s', name) + return None + + return [patch['id'] for patch in patches] + + +def get_patchwork_patches_by_project_id_state(rpc, project_id, state): + logger.debug('Looking up %s patches', state) + try: + patches = rpc.patch_list({'project_id': project_id, 'state': state, 'archived': False}) + except xmlrpclib.Fault as ex: + logger.debug('Got a Fault: %s', ex.faultString) + return None + + if not patches: + logger.debug('No match for state=%s', state) + return None + + return [patch['id'] for patch in patches] + + + +def get_patchwork_pull_requests_by_project(rm, project, fromstate): + page = 0 + pagedata = list() + prs = list() + more = True + while True: + if not pagedata and more: + page += 1 + params = [ + ('project', project), + ('archived', 'false'), + ('state', fromstate), + ('order', '-date'), + ('page', page), + ('q', 'PULL'), + ('per_page', REST_PER_PAGE), + ] + logger.debug('Processing page %s', page) + + pagedata = rm.get_patch_list(params) + if not pagedata or len(pagedata) < REST_PER_PAGE: + more = False + + if not pagedata: + logger.debug('Finished processing all patches') + break + + entry = pagedata.pop() + pull_url = entry.get('pull_url') + if pull_url: + patch_id = entry.get('id') + logger.debug('Found pull request: %s (%s)', pull_url, patch_id) + chunks = pull_url.split() + pull_host = chunks[0] + if len(chunks) > 1: + pull_refname = chunks[1] + else: + pull_refname = 'master' + + prs.append((pull_host, pull_refname, patch_id)) + + return prs + + +def project_id_by_name(rpc, name): + if not name: + return 0 + + global _project_cache + + if _project_cache is None: + _project_cache = rpc.project_list('', 0) + + for project in _project_cache: + if project['linkname'].lower().startswith(name.lower()): + logger.debug('project lookup: linkname=%s, id=%d', name, project['id']) + return project['id'] + + return 0 + + +def db_save_meta(c): + c.execute('DELETE FROM meta') + c.execute('''INSERT INTO meta VALUES(?)''', (DB_VERSION,)) + + +def db_save_repo_heads(c, heads): + c.execute('DELETE FROM heads') + for refname, commit_id in heads: + c.execute('''INSERT INTO heads VALUES(?,?)''', (refname, commit_id)) + + +def db_get_repo_heads(c): + return c.execute('SELECT refname, commit_id FROM heads').fetchall() + + +def db_init_common_sqlite_db(c): + c.execute(''' + CREATE TABLE meta ( + version INTEGER + )''') + db_save_meta(c) + c.execute(''' + CREATE TABLE heads ( + refname TEXT, + commit_id TEXT + )''') + + +def db_init_pw_sqlite_db(c): + logger.info('Initializing new sqlite3 db with metadata version %s', DB_VERSION) + db_init_common_sqlite_db(c) + + +def git_get_command_lines(gitdir, args): + out = git_run_command(gitdir, args) + lines = list() + if out: + for line in out.split('\n'): + if line == '': + continue + lines.append(line) + + return lines + + +def git_run_command(gitdir, args, stdin=None): + args = ['git', '--no-pager', '--git-dir', gitdir] + args + + print(' '.join(args)) + + logger.debug('Running %s' % ' '.join(args)) + + if stdin is None: + (output, error) = subprocess.Popen(args, stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() + else: + pp = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + (output, error) = pp.communicate(input=stdin.encode('utf-8')) + + output = output.strip().decode('utf-8', errors='replace') + if len(error.strip()): + logger.debug('Stderr: %s', error.decode('utf-8', errors='replace')) + + return output + + +def git_get_repo_heads(gitdir): + refs = list() + lines = git_get_command_lines(gitdir, ['show-ref', '--heads']) + if lines is not None: + for line in lines: + (commit_id, refname) = line.split() + refs.append((refname, commit_id)) + return refs + + +def git_get_new_revs(gitdir, db_heads, git_heads, merges=False): + newrevs = dict() + for db_refrow in list(db_heads): + if db_refrow in git_heads: + logger.debug('No changes in %s', db_refrow[0]) + continue + + (refname, db_commit_id) = db_refrow + # Find a matching one in git + git_commit_id = None + for git_refrow in git_heads: + if git_refrow[0] == refname: + git_commit_id = git_refrow[1] + break + if git_commit_id is None: + # Looks like this head is gone from git + db_heads.remove(db_refrow) + continue + + if db_commit_id == git_commit_id: + # No changes in this head + continue + + rev_range = '%s..%s' % (db_commit_id, git_commit_id) + args = ['rev-list', '--pretty=oneline', '--reverse'] + if not merges: + args += ['--no-merges'] + + args += [rev_range, refname] + + lines = git_get_command_lines(gitdir, args) + if not lines: + continue + + newrevs[refname] = list() + + for line in lines: + (commit_id, logmsg) = line.split(' ', 1) + logger.debug('commit_id=%s, subject=%s', commit_id, logmsg) + newrevs[refname].append((commit_id, logmsg)) + + return newrevs + + +def git_get_rev_diff(gitdir, rev): + args = ['diff', '%s~..%s' % (rev, rev)] + return git_run_command(gitdir, args) + + +def git_get_patch_id(diff): + args = ['patch-id', '--stable'] + out = git_run_command('', args, stdin=diff) + logger.debug('out=%s', out) + if not out: + return None + return out.split()[0] + + +def git_log_check(gitdir, title): + args = ['log', '--oneline', '--grep', title] + out = git_run_command(gitdir, args) + if not out: + return False + + return True + + +def get_patchwork_hash(diff): + """Generate a hash from a diff. Lifted verbatim from patchwork.""" + + # normalise spaces + diff = diff.replace('\r', '') + diff = diff.strip() + '\n' + + prefixes = ['-', '+', ' '] + hashed = hashlib.sha1() + + for line in diff.split('\n'): + if len(line) <= 0: + continue + + hunk_match = HUNK_RE.match(line) + filename_match = FILENAME_RE.match(line) + + if filename_match: + # normalise -p1 top-directories + if filename_match.group(1) == '---': + filename = 'a/' + else: + filename = 'b/' + filename += '/'.join(filename_match.group(2).split('/')[1:]) + + line = filename_match.group(1) + ' ' + filename + elif hunk_match: + # remove line numbers, but leave line counts + def fn(x): + if not x: + return 1 + return int(x) + line_nos = list(map(fn, hunk_match.groups())) + line = '@@ -%d +%d @@' % tuple(line_nos) + elif line[0] in prefixes: + # if we have a +, - or context line, leave as-is + pass + else: + # other lines are ignored + continue + + hashed.update((line + '\n').encode('utf-8')) + + return hashed.hexdigest() + + +def get_config_from_repo(repo, regexp, cmdconfig): + config = dict() + args = ['config', '-z', '--local', '--get-regexp', regexp] + out = git_run_command(repo, args) + if not out: + return config + + for line in out.split('\x00'): + if not line: + continue + key, value = line.split('\n', 1) + try: + chunks = key.split('.') + ident = '.'.join(chunks[1:-1]) + if not ident: + ident = '*' + if ident not in config: + config[ident] = dict() + cfgkey = chunks[-1] + config[ident][cfgkey] = value + except ValueError: + logger.debug('Ignoring git config entry %s', line) + + if cmdconfig: + superconfig = dict() + for entry in cmdconfig: + key, value = entry.split('=', 1) + superconfig[key] = value + # add/override values with those passed from cmdline + for ident in config.keys(): + config[ident].update(superconfig) + + return config + + +def send_summary(serieslist, to_state, refname, config, nomail): + logger.info('Preparing summary') + # we send summaries by project, so the project name is going to be all the same + project = serieslist[0].get('project').get('link_name') + body = ( + 'Hello:\n\n' + 'The following patches were marked "%s", because they were applied to\n' + '%s (%s):\n' + ) % (to_state, config['treename'], refname) + + count = 0 + for sdata in serieslist: + count += 1 + logger.debug('Summarizing: %s', sdata.get('name')) + + # If we have a cover letter, then the reference is the msgid of the cover letter, + # else the reference is the msgid of the first patch + patches = sdata.get('patches') + + submitter = sdata.get('submitter') + body += '\n' + + if len(patches) == 1: + body += 'Patch: %s\n' % sdata.get('name') + else: + body += 'Series: %s\n' % sdata.get('name') + + body += ' Submitter: %s <%s>\n' % (submitter.get('name'), submitter.get('email')) + body += ' Patchwork: %s\n' % sdata.get('web_url') + + if sdata.get('cover_letter'): + link = sdata.get('cover_letter').get('msgid') + else: + link = patches[0].get('msgid') + body += ' Link: %s\n' % link + + if len(patches) > 1: + body += ' Patches: %s\n' % patches[0].get('name') + for patch in patches[1:]: + count += 1 + body += ' %s\n' % patch.get('name') + + body += '\nTotal patches: %d\n' % count + + body += '\n-- \nDeet-doot-dot, I am a bot.\nhttps://korg.wiki.kernel.org/userdoc/pwbot\n' + + msg = MIMEText(body.encode('utf-8'), _charset='utf-8') + msg.replace_header('Content-Transfer-Encoding', '8bit') + + msg['Subject'] = Header('Patchwork summary for: %s' % project, 'utf-8') + msg['From'] = Header(config['from'], 'utf-8') + msg['Message-Id'] = make_msgid('git-patchwork-summary') + msg['Date'] = formatdate(localtime=True) + + targets = config['summaryto'].split(',') + msg['To'] = Header(', '.join(targets), 'utf-8') + if 'alwayscc' in config: + msg['Cc'] = config['alwayscc'] + targets.append(config['alwayscc']) + if 'alwaysbcc' in config: + targets.append(config['alwaysbcc']) + + if not nomail: + logger.debug('Message follows') + logger.debug(msg.as_string().decode('utf-8')) + logger.info('Sending summary to: %s', msg['To']) + + smtp = smtplib.SMTP(config['mailhost']) + smtp.sendmail(msg['From'], targets, msg.as_string()) + smtp.close() + else: + logger.info('Would have sent the following:') + logger.info('------------------------------') + logger.info(msg.as_string().decode('utf-8')) + logger.info('------------------------------') + + return msg['Message-Id'] + + +def notify_submitters(rm, serieslist, refname, config, revs, nomail): + logger.info('Sending submitter notifications') + for sdata in serieslist: + # If we have a cover letter, then the reference is the msgid of the cover letter, + # else the reference is the msgid of the first patch + patches = sdata.get('patches') + is_pull_request = False + if sdata.get('cover_letter'): + reference = sdata.get('cover_letter').get('msgid') + fullcover = rm.get_cover(sdata.get('cover_letter').get('id')) + headers = fullcover.get('headers') + content = fullcover.get('content') + else: + reference = patches[0].get('msgid') + fullpatch = rm.get_patch(patches[0].get('id')) + headers = fullpatch.get('headers') + content = fullpatch.get('content') + if fullpatch.get('pull_url'): + is_pull_request = True + + submitter = sdata.get('submitter') + project = sdata.get('project') + + if 'neverto' in config: + neverto = config['neverto'].split(',') + if submitter.get('email') in neverto: + logger.debug('Skipping neverto address:%s', submitter.get('email')) + continue + + xpb = headers.get('X-Patchwork-Bot') + logger.debug('X-Patchwork-Bot=%s', xpb) + # If X-Patchwork-Bot header is set to "notify" we always notify + if xpb != 'notify': + # Use cc-based notification logic + ccs = [] + cchdr = headers.get('Cc') + if not cchdr: + cchdr = headers.get('cc') + if cchdr: + # Sometimes there are multiple cc headers returned + if not isinstance(cchdr, list): + cchdr = [cchdr] + ccs = [chunk[1] for chunk in getaddresses(cchdr)] + + if 'onlyifcc' in config: + match = None + for chunk in config['onlyifcc'].split(','): + if chunk.strip() in ccs: + match = chunk + break + if match is None: + logger.debug('Skipping %s due to onlyifcc=%s', submitter.get('email'), config['onlyifcc']) + continue + + if ccs and 'neverifcc' in config: + match = None + for chunk in config['neverifcc'].split(','): + if chunk.strip() in ccs: + match = chunk + break + if match is not None: + logger.debug('Skipping %s due to neverifcc=%s', submitter.get('email'), config['neverifcc']) + continue + + logger.debug('Preparing a notification for %s', submitter.get('email')) + if is_pull_request: + reqtype = 'pull request' + elif len(sdata.get('patches')) > 1: + reqtype = 'series' + else: + reqtype = 'patch' + + body = ( + 'Hello:\n\n' + 'This %s was applied to %s (%s).\n\n' + ) % (reqtype, config['treename'], refname) + body += 'On %s you wrote:\n' % headers.get('Date') + + if content: + qcount = 0 + for cline in content.split('\n'): + # Quote the first paragraph only and then [snip] if we quoted more than 5 lines + if qcount > 5 and (not len(cline.strip()) or cline.strip().find('---') == 0): + body += '> \n> [...]\n' + break + body += '> %s\n' % cline.rstrip() + qcount += 1 + body += '\n' + + body += '\nHere is a summary with links:\n' + + for patch in sdata.get('patches'): + body += ' - %s\n' % patch.get('name') + if 'commitlink' in config: + body += ' %s%s\n' % (config['commitlink'], revs[patch.get('id')]) + + body += ('\nYou are awesome, thank you!\n\n' + '-- \nDeet-doot-dot, I am a bot.\n' + 'https://korg.wiki.kernel.org/userdoc/pwbot\n') + + msg = MIMEText(body, _charset='utf-8') + msg.replace_header('Content-Transfer-Encoding', '8bit') + + msg['Subject'] = Header('Re: %s' % headers.get('Subject'), 'utf-8') + msg['From'] = Header(config['from'], 'utf-8') + msg['Message-Id'] = make_msgid('git-patchwork-notify') + msg['Date'] = formatdate(localtime=True) + msg['References'] = Header(reference, 'utf-8') + msg['In-Reply-To'] = Header(reference, 'utf-8') + + if 'onlyto' in config: + targets = [config['onlyto']] + msg['To'] = '%s <%s>' % (submitter.get('name'), config['onlyto']) + else: + targets = [submitter.get('email')] + msg['To'] = Header('%s <%s>' % (submitter.get('name'), submitter.get('email')), 'utf-8') + + if 'alwayscc' in config: + msg['Cc'] = config['alwayscc'] + targets += config['alwayscc'].split(',') + if 'alwaysbcc' in config: + targets += config['alwaysbcc'].split(',') + if 'cclist' in config and config['cclist'] == 'true': + targets.append(project.get('list_email')) + msg['Cc'] = project.get('list_email') + + if not nomail: + logger.debug('Message follows') + logger.debug(msg.as_string().decode('utf-8')) + logger.info('Notifying %s', submitter.get('email')) + + smtp = smtplib.SMTP(config['mailhost']) + smtp.sendmail(msg['From'], targets, msg.as_string()) + smtp.close() + else: + logger.info('Would have sent the following:') + logger.info('------------------------------') + logger.info(msg.as_string().decode('utf-8')) + logger.info('------------------------------') + + +def accept_by_gitlog(rm, settings, repo, nomail, dryrun): + for project in settings['projects'].split(','): + report = '' + project = project.strip() + + logger.info('Getting new patches from %s/%s', rm.server, project) + + page = 0 + pagedata = list() + maxPages = 2000 + count = 0 + sreport = list() + + while True: + if not pagedata: + page += 1 + logger.debug(' grabbing page %d', page) + params = [ + ('project', project), + ('order', '-date'), + ('page', page), + ('state', 'new'), + ('per_page', REST_PER_PAGE) + ] + pagedata = rm.get_patch_list(params) + + if not pagedata: + # Got them all? + logger.debug('Finished processing all series') + break + + if page > maxPages: + logger.debug('Finished processing after ' + str(maxPages) + ' pages') + break + + entry = pagedata.pop() + + patch_id = entry.get('id') + name = entry.get('name') + if name is None: + # Ignoring this one, because we must have a name + continue + + # Remove any [foo] from the front, for best matching. + # Usually, patchwork strips these, but not always. + name = re.sub(r'^\[.*?\]\s*', '', name) + + logger.debug(' Processing ' + name) + + #patch = rm.get_patch(patch_id) + # We already have 'the patch' + patch = entry + + state = patch.get('state') + if state != 'new': + logger.info(' Not a new patch, something went wrong') + continue + + # check to see if we think this patch is in the repo already. + logger.info(' Checking patch: %d', patch_id) + + if git_log_check(repo, name): + logger.info(' patch: %d: %s is in the log', patch_id, name) + sreport.append(' %s' % name) + count += 1 + if not dryrun: + rm.update_patch(patch_id, state='Accepted') + else: + logger.info(' Dryrun: Not actually setting state') + + if sreport: + report += 'Accepted ' + str(count) + ' patches' + report += '\n'.join(sreport) + report += '\n\n' + + if 'summaryto' not in settings: + logger.info('Report follows') + logger.info('------------------------------') + logger.info(report) + logger.info('------------------------------') + logger.debug('summaryto not set, not sending report') + continue + + +def housekeeping(rm, settings, nomail, dryrun): + logger.info('Running housekeeping in %s', rm.server) + hconfig = dict() + cutoffdays = 90 + + for chunk in settings['housekeeping'].split(','): + try: + key, val = chunk.split('=') + except ValueError: + logger.debug('Invalid housekeeping setting: %s', chunk) + continue + hconfig[key] = val + + for project in settings['projects'].split(','): + report = '' + project = project.strip() + + if 'autosupersede' in hconfig: + logger.info('Getting series from %s/%s', rm.server, project) + try: + cutoffdays = int(hconfig['autosupersede']) + except ValueError: + pass + + cutoffdate = datetime.datetime.now() - datetime.timedelta(days=cutoffdays) + logger.debug('cutoffdate=%s', cutoffdate) + series = dict() + page = 0 + pagedata = list() + while True: + if not pagedata: + page += 1 + logger.debug(' grabbing page %d', page) + params = [ + ('project', project), + ('order', '-date'), + ('page', page), + ('per_page', REST_PER_PAGE) + ] + pagedata = rm.get_series_list(params) + + if not pagedata: + # Got them all? + logger.debug('Finished processing all series') + break + + entry = pagedata.pop() + # Did we go too far back? + s_date = entry.get('date') + series_date = datetime.datetime.strptime(s_date, "%Y-%m-%dT%H:%M:%S") + if series_date < cutoffdate: + logger.debug('Went too far back, stopping at %s', series_date) + break + + s_id = entry.get('id') + s_name = entry.get('name') + if s_name is None: + # Ignoring this one, because we must have a name + continue + + # Remove any [foo] from the front, for best matching. + # Usually, patchwork strips these, but not always. + s_name = re.sub(r'^\[.*?\]\s*', '', s_name) + + ver = entry.get('version') + subm_id = entry.get('submitter').get('id') + patches = list() + for patch in entry.get('patches'): + patches.append(patch.get('id')) + + if not patches: + # Not sure how we can have a series without patches, but ok + continue + + received_all = entry.get('received_all') + if (subm_id, s_name) not in series: + series[(subm_id, s_name)] = dict() + + series[(subm_id, s_name)][series_date] = { + 'id': id, + 'patches': patches, + 'complete': received_all, + 'date': s_date, + 'rev': ver, + } + logger.debug('Processed id=%s (%s)', s_id, s_name) + + for key, items in series.items(): + if len(items) < 2: + # Not a redundant series + continue + + subm_id, name = key + versions = list(items.keys()) + versions.sort() + latest_version = versions.pop() + logger.debug('%s: latest_version: %s', name, items[latest_version]['date']) + if not items[latest_version]['complete']: + logger.debug('Skipping this series, because it is not complete') + continue + + sreport = list() + logger.info('Checking: [v%s] %s (%s)', items[latest_version]['rev'], name, + items[latest_version]['date']) + for v in versions: + rev = items[v]['rev'] + s_date = items[v]['date'] + patch_id = items[v]['patches'][0] + patch = rm.get_patch(patch_id) + state = patch.get('state') + if state != 'superseded': + logger.info(' Marking series as superseded: [v%s] %s (%s)', rev, name, s_date) + sreport.append(' Superseding: [v%s] %s (%s):' % (rev, name, s_date)) + # Yes, we need to supersede these patches + for patch_id in items[v]['patches']: + logger.info(' Superseding patch: %d', patch_id) + patch = rm.get_patch(patch_id) + patch_title = patch.get('name') + current_state = patch.get('state') + if current_state == 'superseded': + logger.info(' Patch already set to superseded, skipping') + continue + sreport.append(' %s' % patch_title) + if not dryrun: + rm.update_patch(patch_id, state='superseded') + else: + logger.info(' Dryrun: Not actually setting state') + + if sreport: + report += 'Latest series: [v%s] %s (%s)\n' % (items[latest_version]['rev'], name, + items[latest_version]['date']) + report += '\n'.join(sreport) + report += '\n\n' + + if 'autoarchive' in hconfig: + logger.info('Auto-archiving old patches in %s/%s', rm.server, project) + try: + cutoffdays = int(hconfig['autoarchive']) + except ValueError: + pass + + cutoffdate = datetime.datetime.now() - datetime.timedelta(days=cutoffdays) + logger.debug('cutoffdate=%s', cutoffdate) + + page = 0 + seen = set() + pagedata = list() + while True: + if not pagedata: + params = [ + ('project', project), + ('archived', 'false'), + ('state', 'new'), + ('order', 'date'), + ('per_page', REST_PER_PAGE) + ] + + if dryrun: + # We don't need pagination if we're not in dryrun, because + # once we archive the patches, they don't show up in this + # query any more. + page += 1 + params.append(('page', page)) + + pagedata = rm.get_patch_list(params) + + if not pagedata: + logger.debug('Finished processing all patches') + break + + entry = pagedata.pop() + # Did we go too far forward? + patch_date = datetime.datetime.strptime(entry.get('date'), "%Y-%m-%dT%H:%M:%S") + if patch_date >= cutoffdate: + logger.debug('Reached the cutoff date, stopping at %s', patch_date) + break + + patch_id = entry.get('id') + if patch_id in seen: + # If the archived setting isn't actually sticking on the server for + # some reason, then we are in for an infinite loop. Recognize this + # and quit when that happens. + logger.info('Setting to archived is not working, exiting loop.') + break + + seen.update([patch_id]) + patch_title = entry.get('name') + logger.info('Archiving: %s', patch_title) + if not dryrun: + rm.update_patch(patch_id, archived=True) + else: + logger.info(' Dryrun: Not actually archiving') + + if not report: + continue + + if 'summaryto' not in settings: + logger.info('Report follows') + logger.info('------------------------------') + logger.info(report) + logger.info('------------------------------') + logger.debug('summaryto not set, not sending report') + continue + + report += '\n-- \nDeet-doot-dot, I am a bot.\nhttps://korg.wiki.kernel.org/userdoc/pwbot\n' + + msg = MIMEText(report, _charset='utf-8') + msg.replace_header('Content-Transfer-Encoding', '8bit') + + msg['Subject'] = 'Patchwork housekeeping for: %s' % project + msg['From'] = settings['from'] + msg['Message-Id'] = make_msgid('git-patchwork-housekeeping') + msg['Date'] = formatdate(localtime=True) + + targets = settings['summaryto'].split(',') + msg['To'] = ', '.join(targets) + if 'alwayscc' in settings: + msg['Cc'] = settings['alwayscc'] + targets.append(settings['alwayscc']) + if 'alwaysbcc' in settings: + targets.append(settings['alwaysbcc']) + + if not nomail: + logger.debug('Message follows') + logger.debug(msg.as_string().decode('utf-8')) + logger.info('Sending housekeeping summary to: %s', msg['To']) + + smtp = smtplib.SMTP(settings['mailhost']) + smtp.sendmail(msg['From'], targets, msg.as_string()) + smtp.close() + else: + logger.info('Would have sent the following:') + logger.info('------------------------------') + logger.info(msg.as_string().decode('utf-8')) + logger.info('------------------------------') + + +def pwrun(repo, cmdconfig, nomail, dryrun): + if dryrun: + nomail = True + + git_heads = git_get_repo_heads(repo) + if not git_heads: + logger.info('Could not get the latest ref in %s', repo) + sys.exit(1) + + try: + lockfh = open(os.path.join(repo, '.pwrun.lock'), 'w') + lockf(lockfh, LOCK_EX | LOCK_NB) + except IOError: + logger.debug('Could not obtain an exclusive lock, assuming another process is running.') + return + + # Do we have a pw.db there yet? + dbpath = os.path.join(repo, 'pw.db') + db_exists = os.path.isfile(dbpath) + dbconn = sqlite3.connect(dbpath, sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES) + c = dbconn.cursor() + + if not db_exists: + db_init_pw_sqlite_db(c) + db_save_repo_heads(c, git_heads) + # Exit early + dbconn.commit() + return + + db_heads = db_get_repo_heads(c) + + newrevs = git_get_new_revs(repo, db_heads, git_heads, merges=True) + config = get_config_from_repo(repo, r'patchwork\..*', cmdconfig) + + global _project_cache + + for server, settings in config.items(): + _project_cache = None + print('Working on server %s', server) + print('Settings follow') + print(settings) + rm = Restmaker(server, settings) + + if not newrevs and 'housekeeping' in settings: + housekeeping(rm, settings, nomail, dryrun) + accept_by_gitlog(rm, settings, repo, nomail, dryrun) + return + + url = '%s/xmlrpc/' % server + + transport = Transport(url) + transport.set_credentials(settings['user'], settings['pass']) + + try: + rpc = xmlrpclib.Server(url, transport=transport) + except (IOError, OSError): + logger.info('Unable to connect to %s', url) + continue + + + print('Successfully got transport') + + # Generate the state map + statemap = dict() + for pair in settings['statemap'].split(','): + try: + refname, params = pair.split(':') + statemap[refname] = params.split('/') + except ValueError: + logger.info('Invalid statemap entry: %s', pair) + + logger.debug('statemap: %s', statemap) + + rpwhashes = dict() + rgithashes = dict() + have_prs = False + for refname, revlines in newrevs.items(): + if refname not in statemap: + # We don't care about this ref + continue + + rpwhashes[refname] = list() + logger.debug('Looking at %s', refname) + for rev, logline in revlines: + if logline.find('Merge') == 0 and logline.find('://') > 0: + have_prs = True + rpwhashes[refname].append((rev, logline, None)) + continue + diff = git_get_rev_diff(repo, rev) + pwhash = get_patchwork_hash(diff) + git_patch_id = git_get_patch_id(diff) + rgithashes[git_patch_id] = rev + if pwhash: + rpwhashes[refname].append((rev, logline, pwhash)) + + if 'fromstate' in settings: + fromstate = settings['fromstate'].split(',') + else: + fromstate = ['new', 'under-review'] + + logger.debug('fromstate=%s', fromstate) + + for project in settings['projects'].split(','): + count = 0 + project = project.strip() + logger.info('Processing "%s/%s"', server, project) + project_id = project_id_by_name(rpc, project) + + if have_prs: + logger.info('PR merge commit found, loading up pull requests') + prs = get_patchwork_pull_requests_by_project(rm, project, fromstate) + else: + prs = list() + + for refname, hashpairs in rpwhashes.items(): + logger.info('Analyzing %d revisions', len(hashpairs)) + # Patchwork lowercases state name and replaces spaces with dashes + to_state = statemap[refname][0].lower().replace(' ', '-') + + # We create patch_id->rev mapping first + revs = dict() + for rev, logline, pwhash in hashpairs: + if have_prs and pwhash is None: + matches = re.search(r'Merge\s(\S+)\s[\'\"](\S+)[\'\"]\sof\s(\w+://\S+)', logline) + if not matches: + continue + m_obj = matches.group(1) + m_refname = matches.group(2) + m_host = matches.group(3) + + logger.debug('Looking for %s %s %s', m_obj, m_refname, m_host) + + for pull_host, pull_refname, patch_id in prs: + if pull_host.find(m_host) > -1 and pull_refname.find(m_refname) > -1: + logger.debug('Found matching pull request in %s (id: %s)', logline, patch_id) + revs[patch_id] = rev + break + continue + + # Do we have a matching hash on the server? + logger.info('Matching: %s', logline) + # Theoretically, should only return one, but we play it safe and + # handle for multiple matches. + patch_ids = get_patchwork_patches_by_project_id_hash(rpc, project_id, pwhash) + if not patch_ids: + continue + + for patch_id in patch_ids: + pdata = rm.get_patch(patch_id) + if pdata.get('state') not in fromstate: + logger.debug('Ignoring patch_id=%d due to state=%s', patch_id, pdata.get('state')) + continue + revs[patch_id] = rev + + # Now we iterate through it + updated_series = list() + done_patches = set() + for patch_id in revs.copy().keys(): + if patch_id in done_patches: + # we've already updated this series + logger.debug('Already applied %d as part of previous series', patch_id) + continue + pdata = rm.get_patch(patch_id) + serieslist = pdata.get('series', None) + if not serieslist: + # This is probably from the time before patchwork-2 migration. + # We'll just ignore those. + logger.debug('A patch without an associated series? Woah.') + continue + + for series in serieslist: + series_id = series.get('id') + sdata = rm.get_series(series_id) + if not sdata.get('received_all'): + logger.debug('Series %d is incomplete, skipping', series_id) + continue + update_queue = list() + for spatch in sdata.get('patches'): + spatch_id = spatch.get('id') + spdata = rm.get_patch(spatch_id) + + rev = None + if spatch_id in revs: + rev = revs[spatch_id] + else: + # try to use the more fuzzy git-patch-id matching + spatch_hash = git_get_patch_id(spdata.get('diff')) + if spatch_hash is not None and spatch_hash in rgithashes: + logger.debug('Matched via git-patch-id') + rev = rgithashes[spatch_hash] + revs[spatch_id] = rev + + if rev is None: + logger.debug('Could not produce precise match for %s', spatch_id) + logger.debug('Will not update series: %s', sdata.get('name')) + update_queue = list() + break + + update_queue.append((spatch.get('name'), spatch_id, to_state, rev)) + + if update_queue: + logger.info('Marking series "%s": %s', to_state, sdata.get('name')) + updated_series.append(sdata) + for name, spatch_id, to_state, rev in update_queue: + count += 1 + done_patches.update([spatch_id]) + if not dryrun: + logger.info(' Updating: %s', name) + rm.update_patch(spatch_id, state=to_state, commit_ref=rev) + else: + logger.info(' Updating (DRYRUN): %s', name) + + if len(updated_series) and 'send_summary' in statemap[refname]: + send_summary(updated_series, to_state, refname, settings, nomail) + if len(updated_series) and 'notify_submitter' in statemap[refname]: + notify_submitters(rm, updated_series, refname, settings, revs, nomail) + + if count: + logger.info('Updated %d patches on %s', count, server) + else: + logger.info('No patches updated on %s', server) + + if not dryrun: + db_save_repo_heads(c, git_heads) + dbconn.commit() + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('-r', '--repository', dest='repo', required=True, + help='Check the repository and auto-accept any applied patches.') + parser.add_argument('-c', '--config', dest='config', nargs='+', default=list(), + help='Use these config values instead of those in the repo config') + parser.add_argument('-l', '--logfile', default=None, + help='Log file for messages during quiet operation') + parser.add_argument('-d', '--dry-run', dest='dryrun', action='store_true', default=False, + help='Do not mail or store anything, just do a dry run.') + parser.add_argument('-n', '--no-mail', dest='nomail', action='store_true', default=False, + help='Do not mail anything, but store database entries.') + parser.add_argument('-q', '--quiet', action='store_true', default=False, + help='Only output errors to the stdout') + parser.add_argument('-v', '--verbose', action='store_true', default=False, + help='Be more verbose in logging output') + + cmdargs = parser.parse_args() + + logger.setLevel(logging.DEBUG) + + if cmdargs.logfile: + ch = logging.FileHandler(cmdargs.logfile) + formatter = logging.Formatter( + '[%(asctime)s] %(message)s') + ch.setFormatter(formatter) + + if cmdargs.verbose: + ch.setLevel(logging.DEBUG) + else: + ch.setLevel(logging.INFO) + logger.addHandler(ch) + + ch = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + ch.setFormatter(formatter) + + if cmdargs.quiet: + ch.setLevel(logging.CRITICAL) + else: + ch.setLevel(logging.INFO) + + logger.addHandler(ch) + + pwrun(cmdargs.repo, cmdargs.config, cmdargs.nomail, cmdargs.dryrun) |