diff mbox series

insane: improve license checksumming logic

Message ID 20190321123042.2947-1-ross.burton@intel.com
State Accepted
Commit 63ef9d342277c4ba541b78cbb45ef181f071f495
Headers show
Series insane: improve license checksumming logic | expand

Commit Message

Ross Burton March 21, 2019, 12:30 p.m. UTC
Instead of opening files as bytes and battling decoding to UTF-8 which can throw
exceptions, open directly as strings and replace invalid codepoints.  This
handles licenses in encodings which are not UTF-8 but are based on ASCII much
better.

Also instead of extracting the license lines, writing them to a file, and then
hashing the file, hash the lines directly.

Signed-off-by: Ross Burton <ross.burton@intel.com>

---
 meta/classes/insane.bbclass | 81 ++++++++++++++++++---------------------------
 1 file changed, 32 insertions(+), 49 deletions(-)

-- 
2.11.0

-- 
_______________________________________________
Openembedded-core mailing list
Openembedded-core@lists.openembedded.org
http://lists.openembedded.org/mailman/listinfo/openembedded-core

Comments

Andre McCurdy Aug. 27, 2019, 8:57 p.m. UTC | #1
On Thu, Mar 21, 2019 at 5:30 AM Ross Burton <ross.burton@intel.com> wrote:
>

> Instead of opening files as bytes and battling decoding to UTF-8 which can throw

> exceptions, open directly as strings and replace invalid codepoints.  This

> handles licenses in encodings which are not UTF-8 but are based on ASCII much

> better.

>

> Also instead of extracting the license lines, writing them to a file, and then

> hashing the file, hash the lines directly.


This change seems to cause a regression in the debug output generated
if LIC_FILES_CHKSUM is wrong. If endline is set incorrectly to include
an empty line beyond the end of the intended license text then that
empty line will no longer be shown between the "vvv" and "^^^" lines
in the debug output... even though the extra line is included in the
md5 calculation.
-- 
_______________________________________________
Openembedded-core mailing list
Openembedded-core@lists.openembedded.org
http://lists.openembedded.org/mailman/listinfo/openembedded-core
diff mbox series

Patch

diff --git a/meta/classes/insane.bbclass b/meta/classes/insane.bbclass
index a2adfee835e..37b8bb0032d 100644
--- a/meta/classes/insane.bbclass
+++ b/meta/classes/insane.bbclass
@@ -458,7 +458,6 @@  python populate_lic_qa_checksum() {
     """
     Check for changes in the license files.
     """
-    import tempfile
     sane = True
 
     lic_files = d.getVar('LIC_FILES_CHKSUM') or ''
@@ -496,61 +495,45 @@  python populate_lic_qa_checksum() {
 
         if (not beginline) and (not endline):
             md5chksum = bb.utils.md5_file(srclicfile)
-            with open(srclicfile, 'rb') as f:
-                license = f.read()
+            with open(srclicfile, 'r', errors='replace') as f:
+                license = f.read().splitlines()
         else:
-            fi = open(srclicfile, 'rb')
-            fo = tempfile.NamedTemporaryFile(mode='wb', prefix='poky.', suffix='.tmp', delete=False)
-            tmplicfile = fo.name;
-            lineno = 0
-            linesout = 0
-            license = []
-            for line in fi:
-                lineno += 1
-                if (lineno >= beginline):
-                    if ((lineno <= endline) or not endline):
-                        fo.write(line)
-                        license.append(line)
-                        linesout += 1
-                    else:
-                        break
-            fo.flush()
-            fo.close()
-            fi.close()
-            md5chksum = bb.utils.md5_file(tmplicfile)
-            license = b''.join(license)
-            os.unlink(tmplicfile)
-
+            with open(srclicfile, 'rb') as f:
+                import hashlib
+                lineno = 0
+                license = []
+                m = hashlib.md5()
+                for line in f:
+                    lineno += 1
+                    if (lineno >= beginline):
+                        if ((lineno <= endline) or not endline):
+                            m.update(line)
+                            license.append(line.decode('utf-8', errors='replace').rstrip())
+                        else:
+                            break
+                md5chksum = m.hexdigest()
         if recipemd5 == md5chksum:
             bb.note (pn + ": md5 checksum matched for ", url)
         else:
             if recipemd5:
                 msg = pn + ": The LIC_FILES_CHKSUM does not match for " + url
                 msg = msg + "\n" + pn + ": The new md5 checksum is " + md5chksum
-                try:
-                    license_lines = license.decode('utf-8').split('\n')
-                except:
-                    # License text might not be valid UTF-8, in which
-                    # case we don't know how to include it in our output
-                    # and have to skip it.
-                    pass
-                else:
-                    max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
-                    if not license_lines or license_lines[-1] != '':
-                        # Ensure that our license text ends with a line break
-                        # (will be added with join() below).
-                        license_lines.append('')
-                    remove = len(license_lines) - max_lines
-                    if remove > 0:
-                        start = max_lines // 2
-                        end = start + remove - 1
-                        del license_lines[start:end]
-                        license_lines.insert(start, '...')
-                    msg = msg + "\n" + pn + ": Here is the selected license text:" + \
-                          "\n" + \
-                          "{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
-                          "\n" + "\n".join(license_lines) + \
-                          "{:^^70}".format(" endline=%d " % endline if endline else "")
+                max_lines = int(d.getVar('QA_MAX_LICENSE_LINES') or 20)
+                if not license or license[-1] != '':
+                    # Ensure that our license text ends with a line break
+                    # (will be added with join() below).
+                    license.append('')
+                remove = len(license) - max_lines
+                if remove > 0:
+                    start = max_lines // 2
+                    end = start + remove - 1
+                    del license[start:end]
+                    license.insert(start, '...')
+                msg = msg + "\n" + pn + ": Here is the selected license text:" + \
+                        "\n" + \
+                        "{:v^70}".format(" beginline=%d " % beginline if beginline else "") + \
+                        "\n" + "\n".join(license) + \
+                        "{:^^70}".format(" endline=%d " % endline if endline else "")
                 if beginline:
                     if endline:
                         srcfiledesc = "%s (lines %d through to %d)" % (srclicfile, beginline, endline)