[RFC,14/16] patman: Support checking for review tags in patchwork

Message ID 20200706034203.2171077-15-sjg@chromium.org
State New
Headers show
Series
  • RFC: patman: Collect review tags and comments from Patchwork
Related show

Commit Message

Simon Glass July 6, 2020, 3:42 a.m.
Before sending out a new version of a series for review, it is important
to add any review tags (e.g. Reviewed-by, Acked-by) collected by
patchwork. Otherwise people waste time reviewing the same patch
repeatedly, become frustrated and stop reviewing your patches.

To help with this, add a new 'status' subcommand that checks patchwork
for review tags, showing those which are not present in the local branch.

This allows users to see what new review tags have been received and then
add them.

Signed-off-by: Simon Glass <sjg at chromium.org>
---

 tools/patman/README     |  34 ++++
 tools/patman/control.py |  31 ++++
 tools/patman/main.py    |  17 ++
 tools/patman/status.py  | 334 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 416 insertions(+)
 create mode 100644 tools/patman/status.py

Patch

diff --git a/tools/patman/README b/tools/patman/README
index 7291e47d0c..a85974740f 100644
--- a/tools/patman/README
+++ b/tools/patman/README
@@ -11,6 +11,8 @@  This tool is a Python script which:
 - Runs the patches through checkpatch.pl and its own checks
 - Optionally emails them out to selected people
 
+It also shows review tags from Patchwork so you can update your local patches.
+
 It is intended to automate patch creation and make it a less
 error-prone process. It is useful for U-Boot and Linux work so far,
 since it uses the checkpatch.pl script.
@@ -345,6 +347,38 @@  These people will get the cover letter even if they are not on the To/Cc
 list for any of the patches.
 
 
+Patchwork Integration
+=====================
+
+Patman has a very basic integration with Patchwork. If you point patman to
+your series on patchwork it can show you what new reviews have appears since
+you sent your series.
+
+To set this up, add a Series-link tag to one of the commits in your series
+(see above).
+
+Then you can type
+
+    patman status
+
+and patman will show you each patch and what review tags have been collected,
+for example:
+
+...
+ 21 x86: mtrr: Update the command to use the new mtrr
+    Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com>
+  + Reviewed-by: Bin Meng <bmeng.cn at gmail.com>
+ 22 x86: mtrr: Restructure so command execution is in
+    Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com>
+  + Reviewed-by: Bin Meng <bmeng.cn at gmail.com>
+...
+
+This shows that patch 21 and 22 were sent out with one review but have since
+attracted another review each. If the series needs changes, you can update
+these commits with the new review tag before sending the next version of the
+series.
+
+
 Example Work Flow
 =================
 
diff --git a/tools/patman/control.py b/tools/patman/control.py
index e67867b845..3bb3c236e4 100644
--- a/tools/patman/control.py
+++ b/tools/patman/control.py
@@ -173,3 +173,34 @@  def send(args):
             its_a_go, args.ignore_bad_tags, args.add_maintainers,
             args.limit, args.dry_run, args.in_reply_to, args.thread,
             args.smtp_server)
+
+def patchwork_status(branch, count, start, end):
+    """Check the status of patches in patchwork
+
+    This finds the series in patchwork using the Series-link tag, checks for new
+    comments / review tags and displays them
+
+    Args:
+        branch (str): Branch to create patches from (None = current)
+        count (int): Number of patches to produce, or -1 to produce patches for
+            the current branch back to the upstream commit
+        start (int): Start partch to use (0=first / top of branch)
+        end (int): End patch to use (0=last one in series, 1=one before that,
+            etc.)
+
+    Raises:
+        ValueError: if the branch has no Series-link value
+    """
+    if count == -1:
+        # Work out how many patches to send if we can
+        count = (gitutil.CountCommitsToBranch(branch) - start)
+
+    series = patchstream.GetMetaData(branch, start, count - end)
+    link = series.get('link')
+    if not link:
+        raise ValueError("Branch has no Series-link value")
+
+    # Import this here to avoid failing on other commands if the dependencies
+    # are not present
+    from patman import status
+    status.check_patchwork_status(series, link, branch)
diff --git a/tools/patman/main.py b/tools/patman/main.py
index b96000807e..227eb3b228 100755
--- a/tools/patman/main.py
+++ b/tools/patman/main.py
@@ -88,6 +88,10 @@  send.add_argument('patchfiles', nargs='*')
 test_parser = subparsers.add_parser('test', help='Run tests')
 AddCommonArgs(test_parser)
 
+status = subparsers.add_parser('status',
+                               help='Check status of patches in patchwork')
+AddCommonArgs(status)
+
 # Parse options twice: first to get the project and second to handle
 # defaults properly (which depends on project).
 argv = sys.argv[1:]
@@ -147,3 +151,16 @@  elif args.cmd == 'send':
 
     else:
         control.send(args)
+
+# Check status of patches in patchwork
+elif args.cmd == 'status':
+    ret_code = 0
+    try:
+        control.patchwork_status(args.branch, args.count, args.start, args.end)
+    except Exception as e:
+        print('patman: %s: %s' % (type(e).__name__, e))
+        if args.debug:
+            print()
+            traceback.print_exc()
+        ret_code = 1
+    sys.exit(ret_code)
diff --git a/tools/patman/status.py b/tools/patman/status.py
new file mode 100644
index 0000000000..c2e98a5d11
--- /dev/null
+++ b/tools/patman/status.py
@@ -0,0 +1,334 @@ 
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright 2020 Google LLC
+#
+# This talks to the patchwork service to figure out what patches have been
+# reviewed and commented on.
+
+from bs4 import BeautifulSoup
+import collections
+import concurrent.futures
+import io
+import pygit2
+from itertools import repeat
+import re
+import requests
+import time
+from urllib.parse import urljoin
+
+from patman import commit
+from patman import gitutil
+from patman import patchstream
+from patman import terminal
+from patman.terminal import Color
+
+# Patches which are part of a multi-patch series are shown with a prefix like
+# [prefix, version, sequence], for example '[RFC, v2, 3/5]'. All but the last
+# part is optional. This decodes the string into groups. For single patches
+# the [] part is not present:
+# Groups: (ignore, ignore, ignore, prefix, version, sequence, subject)
+re_patch = re.compile('(\[(((.*),)?(.*),)?(.*)\]\s)?(.*)$')
+
+# This decodes the sequence string into a patch number and patch count
+re_seq = re.compile('(\d+)/(\d+)')
+
+def to_int(vals):
+    """Convert a lsit of strings into integers, using 0 if not an integer
+
+    Args:
+        vals: List of strings
+
+    Returns:
+        List of integers, one for each input string
+    """
+    out = [int(val) if val.isdigit() else 0 for val in vals]
+    return out
+
+
+class Patch(dict):
+    """Models a patch in patchwork
+
+    This class records information obtained from patchwork
+
+    Some of this information comes from the 'Patch' column:
+
+        [RFC,v2,1/3] dm: Driver and uclass changes for tiny-dm
+
+    This shows the prefix, version, seq, count and subject.
+
+    The other properties come from other columns in the display.
+
+    Properties:
+        seq: Sequence number within series (1=first) parsed from sequence string
+        count: Number of patches in series, parsed from sequence string
+        prefix: Prefix string or None
+        subject: Patch subject with [..] part removed (same as commit subject)
+        series: Series name
+        acked: Number of Acked-by rtags
+        fixes: Number of Fixes rtags
+        reviewed: Number of Reviewed-by rtags
+        tested: Number of Tested-by rtags
+        success: Success value
+        warning: Warning value
+        fail: Failure value
+        date: Date as a string
+        Submitter: Submitter name
+        Delegate: Deleted ID
+        state: Current state (e.g. 'New')
+        url: URL pointing to the patch on patchwork
+    """
+    def __init__(self):
+        self.seq = None
+        self.count = None
+        self.prefix = None
+        self.subject = None
+        self.series = None
+        self.acked = None
+        self.fixes = None
+        self.reviewed = None
+        self.tested = None
+        self.success = None
+        self.warning = None
+        self.fail = None
+        self.date = None
+        self.submitter = None
+        self.delegate = None
+        self.state = None
+        self.url = None
+
+    # These make us more like a dictionary
+    def __setattr__(self, name, value):
+        self[name] = value
+
+    def __getattr__(self, name):
+        return self[name]
+
+    def Add(self, name, value):
+        """Add information obtained from the web page
+
+        This sets the object properties based on @name.
+
+        Args:
+            name: Column name in lower-case, e.g. 'Series' or 'State'
+            value: Text obtained for this patch from that column, e.g. 'New'
+        """
+        if name in self:
+            self[name] = value
+        elif name == 'patch':
+            mat = re_patch.search(value.strip())
+            if not mat:
+                raise ValueError("Cannot parse subject '%s'" % value)
+            self.prefix = mat.group(1)
+            self.prefix, self.version, seq_info, self.subject = mat.groups()[3:]
+            mat_seq = re_seq.match(seq_info)
+            if not mat_seq:
+                raise ValueError("Cannot parse sequence spec '%s'" % seq_info)
+            self.seq = int(mat_seq.group(1))
+            self.count = int(mat_seq.group(2))
+        elif name == 'a/f/r/t':
+            self.acked, self.fixes, self.reviewed, self.tested = to_int(
+                value.split())
+        elif name == 's/w/f':
+            self.success, self.warning, self.fail = to_int(value)
+        else:
+            print('Unknown field: %s\n' % name)
+
+    def SetUrl(self, url):
+        """Sets the URL for a patch
+
+        Args:
+            url: URL to set
+        """
+        self.url = url
+
+
+def FindResponses(url):
+    """Collect the response tags (rtags) to a patch found in patchwork
+
+    Reads the URL and checks each comment for an rtags (e.g. Reviewed-by).
+    This does not look at the original patch (which have have rtags picked
+    up from previous rounds of reviews). It only looks at comments which add
+    new rtags.
+
+    Args:
+        url: URL of the patch
+
+    Returns:
+        Dict of rtags:
+            key: rtag type, e.g. 'Acked-by'
+            value: Set of people providing that rtag. Each member of the set
+                is a name/email, e.g. 'Bin Meng <bmeng.cn at gmail.com>'
+    """
+    web_data = requests.get(url)
+    if web_data.status_code != 200:
+        raise ValueError("Could not read URL '%s'" % url)
+    soup = BeautifulSoup(web_data.text, 'html.parser')
+
+    # The first comment is the patch itself, so skip that
+    comments = soup.findAll(class_='comment')[1:]
+    rtags = collections.defaultdict(set)
+    reviews = []
+
+    if comments:
+        for comment in comments:
+            ps = patchstream.PatchStream(None)
+            ps.commit = commit.Commit(None)
+            meta = comment.find(class_='meta')
+            content = comment.find(class_='content')
+            infd = io.StringIO(content.get_text())
+            outfd = io.StringIO()
+            ps.ProcessStream(infd, outfd)
+            for response, people in ps.commit.rtags.items():
+                rtags[response].update(people)
+    return rtags
+
+def CollectPatches(series, url):
+    """Collect patch information about a series from patchwork
+
+    Reads the web page and collects information provided by patchwork about
+    the status of each patch.
+
+    Args:
+        series: Series object corresponding to the local branch containing the
+            series
+        url: URL pointing to patchwork's view of this series, possibly an
+            earlier versions of it
+
+    Returns:
+        List of patches sorted by sequence number, each a Patch object
+    """
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise ValueError("Could not read URL '%s'" % url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    # Find the main table
+    tables = soup.findAll('table')
+    if len(tables) != 1:
+        raise ValueError('Failed to parse page (expected only one table)')
+    tab = tables[0]
+
+    # Get all the rows, which are patches
+    rows = tab.tbody.findAll('tr')
+    count = len(rows)
+    num_commits = len(series.commits)
+    if count != num_commits:
+        print('Warning: Web page reports %d patches, series has %d' %
+              (count, num_commits))
+
+    # Get the column headers, which we use to figure out which column is which
+    hdrs = tab.thead.findAll('th')
+    cols = []
+    for hdr in hdrs:
+        cols.append(hdr.get_text().strip())
+    patches = []
+
+    # Work through each row (patch) one at a time, collecting the information
+    for row in rows:
+        patch = Patch()
+        for seq, col in enumerate(row.findAll('td')):
+            name = cols[seq].lower()
+            patch.Add(name, col.get_text())
+            if name == 'patch':
+                patch.SetUrl(urljoin(url, col.a['href']))
+        if patch.count != count:
+            raise ValueError("Patch %d '%s' has count of %d, expected %d" %
+                (patch.seq, patch.subject, patch.count, num_commits))
+        patches.append(patch)
+
+    # Sort patches by patch number
+    patches = sorted(patches, key=lambda x: x.seq)
+    return patches
+
+def FindNewResponses(new_rtag_list, seq, commit, patch):
+    """Find new rtags collected by patchwork that we don't know about
+
+    This is designed to be run in parallel, once for each commit/patch
+
+    Args:
+        new_rtag_list: New rtags are written to new_rtag_list[seq]
+            list, each a dict:
+                key: Response tag (e.g. 'Reviewed-by')
+                value: Set of people who gave that response, each a name/email
+                    string
+        seq: Position in new_rtag_list to update
+        commit: Commit object for this commit
+        patch: Corresponding Patch object for this patch
+    """
+    acked = commit.rtags['Acked-by']
+    reviewed = commit.rtags['Reviewed-by']
+    tested = commit.rtags['Tested-by']
+    fixes = commit.rtags['Fixes']
+
+    # Figure out the number of new rtags by subtracting what we have in
+    # the commit from what patchwork's summary reports.
+    extra = 0
+    extra += patch.acked - len(acked)
+    extra += patch.fixes - len(fixes)
+    extra += patch.reviewed - len(reviewed)
+    extra += patch.tested - len(tested)
+
+    # If patchwork agrees with the commit, we don't need to read the web
+    # page, so save some time. If not, we need to add the new ones to the commit
+    new_rtags = collections.defaultdict(set)
+    if extra:
+        pw_rtags = FindResponses(patch.url)
+        base_rtags = commit.rtags
+        new_count = 0
+        for tag, people in pw_rtags.items():
+            for who in people:
+                is_new = (tag not in base_rtags or
+                          who not in base_rtags[tag])
+                if is_new:
+                    new_rtags[tag].add(who)
+                    new_count += 1
+    new_rtag_list[seq] = new_rtags
+
+def ShowResponses(rtags, indent, is_new):
+    """Show rtags collected
+
+    Args:
+        rtags: dict:
+            key: Response tag (e.g. 'Reviewed-by')
+            value: Set of people who gave that response, each a name/email string
+        indent: Indentation string to write before each line
+        is_new: True if this output should be highlighted
+    """
+    col = terminal.Color()
+    count = 0
+    for tag, people in rtags.items():
+        for who in people:
+            terminal.Print(indent + '%s %s: ' % ('+' if is_new else ' ', tag),
+                           newline=False, colour=col.GREEN, bright=is_new)
+            terminal.Print(who, colour=col.WHITE, bright=is_new)
+            count += 1
+    return count
+
+def check_patchwork_status(series, url, branch):
+    patches = CollectPatches(series, url)
+    col = terminal.Color()
+    count = len(patches)
+    new_rtag_list = [None] * count
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        futures = executor.map(
+            FindNewResponses, repeat(new_rtag_list), range(count),
+            series.commits, patches)
+    for fresponse in futures:
+        if fresponse:
+            raise fresponse.exception()
+
+    num_to_add = 0
+    for seq, patch in enumerate(patches):
+        terminal.Print('%3d %s' % (patch.seq, patch.subject[:50]),
+                       colour=col.BLUE)
+        commit = series.commits[seq]
+        base_rtags = commit.rtags
+        new_rtags = new_rtag_list[seq]
+
+        indent = ' ' * 2
+        ShowResponses(base_rtags, indent, False)
+        num_to_add += ShowResponses(new_rtags, indent, True)
+
+    print("%d new response%s available in patchwork" %
+          (num_to_add, 's' if num_to_add != 1 else ''))