distro_check: partial rewrite to make it work again

Message ID 1478255821-5289-1-git-send-email-ross.burton@intel.com
State Accepted
Commit 58de12eaaac9c60bb8fc84a3a965ef86d2a39ae0
Headers show

Commit Message

Ross Burton Nov. 4, 2016, 10:37 a.m.
This library suffered as part of the Python 2 to Python 3 migration and stopped
working entirely.

Fix all the migration problems such as files being treated as strings but opened
in binary mode, insufficient use of with on files, and so on.

Rewrite large amounts to be Pythonic instead of C-in-Python.

Update OpenSuse and Fedora URLs.

Fedora now splits the archive alphabetically so handle that.

[ YOCTO #10562 ]

Signed-off-by: Ross Burton <ross.burton@intel.com>

---
 meta/lib/oe/distro_check.py | 281 +++++++++++++++++---------------------------
 1 file changed, 110 insertions(+), 171 deletions(-)

-- 
2.8.1

-- 
_______________________________________________
Openembedded-core mailing list
Openembedded-core@lists.openembedded.org
http://lists.openembedded.org/mailman/listinfo/openembedded-core

Patch

diff --git a/meta/lib/oe/distro_check.py b/meta/lib/oe/distro_check.py
index 87c52fa..00c827e 100644
--- a/meta/lib/oe/distro_check.py
+++ b/meta/lib/oe/distro_check.py
@@ -1,32 +1,17 @@ 
-from contextlib import contextmanager
-
-from bb.utils import export_proxies
-
 def create_socket(url, d):
     import urllib
+    from bb.utils import export_proxies
 
-    socket = None
-    try:
-        export_proxies(d)
-        socket = urllib.request.urlopen(url)
-    except:
-        bb.warn("distro_check: create_socket url %s can't access" % url)
-
-    return socket
+    export_proxies(d)
+    return urllib.request.urlopen(url)
 
 def get_links_from_url(url, d):
     "Return all the href links found on the web location"
 
     from bs4 import BeautifulSoup, SoupStrainer
 
+    soup = BeautifulSoup(create_socket(url,d), "html.parser", parse_only=SoupStrainer("a"))
     hyperlinks = []
-
-    webpage = ''
-    sock = create_socket(url,d)
-    if sock:
-        webpage = sock.read()
-
-    soup = BeautifulSoup(webpage, "html.parser", parse_only=SoupStrainer("a"))
     for line in soup.find_all('a', href=True):
         hyperlinks.append(line['href'].strip('/'))
     return hyperlinks
@@ -37,6 +22,7 @@  def find_latest_numeric_release(url, d):
     maxstr=""
     for link in get_links_from_url(url, d):
         try:
+            # TODO use LooseVersion
             release = float(link)
         except:
             release = 0
@@ -47,144 +33,116 @@  def find_latest_numeric_release(url, d):
 
 def is_src_rpm(name):
     "Check if the link is pointing to a src.rpm file"
-    if name[-8:] == ".src.rpm":
-        return True
-    else:
-        return False
+    return name.endswith(".src.rpm")
 
 def package_name_from_srpm(srpm):
     "Strip out the package name from the src.rpm filename"
-    strings = srpm.split('-')
-    package_name = strings[0]
-    for i in range(1, len (strings) - 1):
-        str = strings[i]
-        if not str[0].isdigit():
-            package_name += '-' + str
-    return package_name
-
-def clean_package_list(package_list):
-    "Removes multiple entries of packages and sorts the list"
-    set = {}
-    map(set.__setitem__, package_list, [])
-    return set.keys()
 
+    # ca-certificates-2016.2.7-1.0.fc24.src.rpm
+    # ^name           ^ver     ^release^removed
+    (name, version, release) = srpm.replace(".src.rpm", "").rsplit("-", 2)
+    return name
 
 def get_latest_released_meego_source_package_list(d):
     "Returns list of all the name os packages in the latest meego distro"
 
-    package_names = []
-    try:
-        f = open("/tmp/Meego-1.1", "r")
+    package_names = set()
+    with open("/tmp/Meego-1.1", "r") as f:
         for line in f:
-            package_names.append(line[:-1] + ":" + "main") # Also strip the '\n' at the end
-    except IOError: pass
-    package_list=clean_package_list(package_names)
-    return "1.0", package_list
+            package_names.add(line.strip() + ":" + "main")
+    return "1.1", package_names
 
 def get_source_package_list_from_url(url, section, d):
     "Return a sectioned list of package names from a URL list"
 
     bb.note("Reading %s: %s" % (url, section))
     links = get_links_from_url(url, d)
-    srpms = list(filter(is_src_rpm, links))
-    names_list = list(map(package_name_from_srpm, srpms))
+    srpms = filter(is_src_rpm, links)
+    names_list = map(package_name_from_srpm, srpms)
 
-    new_pkgs = []
+    new_pkgs = set()
     for pkgs in names_list:
-       new_pkgs.append(pkgs + ":" + section)
-
+       new_pkgs.add(pkgs + ":" + section)
     return new_pkgs
 
+def get_source_package_list_from_url_by_letter(url, section, d):
+    import string
+    from urllib.error import HTTPError
+    packages = set()
+    for letter in (string.ascii_lowercase + string.digits):
+        # Not all subfolders may exist, so silently handle 404
+        try:
+            packages |= get_source_package_list_from_url(url + "/" + letter, section, d)
+        except HTTPError as e:
+            if e.code != 404: raise
+    return packages
+
 def get_latest_released_fedora_source_package_list(d):
     "Returns list of all the name os packages in the latest fedora distro"
     latest = find_latest_numeric_release("http://archive.fedoraproject.org/pub/fedora/linux/releases/", d)
-
-    package_names = get_source_package_list_from_url("http://archive.fedoraproject.org/pub/fedora/linux/releases/%s/Fedora/source/SRPMS/" % latest, "main", d)
-
-#    package_names += get_source_package_list_from_url("http://download.fedora.redhat.com/pub/fedora/linux/releases/%s/Everything/source/SPRMS/" % latest, "everything")
-    package_names += get_source_package_list_from_url("http://archive.fedoraproject.org/pub/fedora/linux/updates/%s/SRPMS/" % latest, "updates", d)
-
-    package_list=clean_package_list(package_names)
-        
-    return latest, package_list
+    package_names = get_source_package_list_from_url_by_letter("http://archive.fedoraproject.org/pub/fedora/linux/releases/%s/Everything/source/tree/Packages/" % latest, "main", d)
+    package_names |= get_source_package_list_from_url_by_letter("http://archive.fedoraproject.org/pub/fedora/linux/updates/%s/SRPMS/" % latest, "updates", d)
+    return latest, package_names
 
 def get_latest_released_opensuse_source_package_list(d):
     "Returns list of all the name os packages in the latest opensuse distro"
     latest = find_latest_numeric_release("http://download.opensuse.org/source/distribution/",d)
 
     package_names = get_source_package_list_from_url("http://download.opensuse.org/source/distribution/%s/repo/oss/suse/src/" % latest, "main", d)
-    package_names += get_source_package_list_from_url("http://download.opensuse.org/update/%s/rpm/src/" % latest, "updates", d)
-
-    package_list=clean_package_list(package_names)
-    return latest, package_list
+    package_names |= get_source_package_list_from_url("http://download.opensuse.org/update/%s/src/" % latest, "updates", d)
+    return latest, package_names
 
 def get_latest_released_mandriva_source_package_list(d):
     "Returns list of all the name os packages in the latest mandriva distro"
     latest = find_latest_numeric_release("http://distrib-coffee.ipsl.jussieu.fr/pub/linux/MandrivaLinux/official/", d)
     package_names = get_source_package_list_from_url("http://distrib-coffee.ipsl.jussieu.fr/pub/linux/MandrivaLinux/official/%s/SRPMS/main/release/" % latest, "main", d)
-#    package_names += get_source_package_list_from_url("http://distrib-coffee.ipsl.jussieu.fr/pub/linux/MandrivaLinux/official/%s/SRPMS/contrib/release/" % latest, "contrib")
-    package_names += get_source_package_list_from_url("http://distrib-coffee.ipsl.jussieu.fr/pub/linux/MandrivaLinux/official/%s/SRPMS/main/updates/" % latest, "updates", d)
-
-    package_list=clean_package_list(package_names)
-    return latest, package_list
+    package_names |= get_source_package_list_from_url("http://distrib-coffee.ipsl.jussieu.fr/pub/linux/MandrivaLinux/official/%s/SRPMS/main/updates/" % latest, "updates", d)
+    return latest, package_names
 
 def find_latest_debian_release(url, d):
     "Find the latest listed debian release on the given url"
 
-    releases = []
-    for link in get_links_from_url(url, d):
-        if link[:6] == "Debian":
-            if ';' not in link:
-                releases.append(link)
+    releases = [link.replace("Debian", "")
+                for link in get_links_from_url(url, d)
+                if link.startswith("Debian")]
     releases.sort()
     try:
-        return releases.pop()[6:]
+        return releases[-1]
     except:
         return "_NotFound_"
 
 def get_debian_style_source_package_list(url, section, d):
     "Return the list of package-names stored in the debian style Sources.gz file"
-    import tempfile
     import gzip
 
-    webpage = ''
-    sock = create_socket(url,d)
-    if sock:
-        webpage = sock.read()
-
-    tmpfile = tempfile.NamedTemporaryFile(mode='wb', prefix='oecore.', suffix='.tmp', delete=False)
-    tmpfilename=tmpfile.name
-    tmpfile.write(sock.read())
-    tmpfile.close()
-    bb.note("Reading %s: %s" % (url, section))
-
-    f = gzip.open(tmpfilename)
-    package_names = []
-    for line in f:
-        if line[:9] == "Package: ":
-            package_names.append(line[9:-1] + ":" + section) # Also strip the '\n' at the end
-    os.unlink(tmpfilename)
-
+    package_names = set()
+    for line in gzip.open(create_socket(url, d), mode="rt"):
+        if line.startswith("Package:"):
+            pkg = line.split(":", 1)[1].strip()
+            package_names.add(pkg + ":" + section)
     return package_names
 
 def get_latest_released_debian_source_package_list(d):
-    "Returns list of all the name os packages in the latest debian distro"
+    "Returns list of all the name of packages in the latest debian distro"
     latest = find_latest_debian_release("http://ftp.debian.org/debian/dists/", d)
-    url = "http://ftp.debian.org/debian/dists/stable/main/source/Sources.gz" 
+    url = "http://ftp.debian.org/debian/dists/stable/main/source/Sources.gz"
     package_names = get_debian_style_source_package_list(url, "main", d)
-#    url = "http://ftp.debian.org/debian/dists/stable/contrib/source/Sources.gz" 
-#    package_names += get_debian_style_source_package_list(url, "contrib")
-    url = "http://ftp.debian.org/debian/dists/stable-proposed-updates/main/source/Sources.gz" 
-    package_names += get_debian_style_source_package_list(url, "updates", d)
-    package_list=clean_package_list(package_names)
-    return latest, package_list
+    url = "http://ftp.debian.org/debian/dists/stable-proposed-updates/main/source/Sources.gz"
+    package_names |= get_debian_style_source_package_list(url, "updates", d)
+    return latest, package_names
 
 def find_latest_ubuntu_release(url, d):
-    "Find the latest listed ubuntu release on the given url"
+    """
+    Find the latest listed Ubuntu release on the given ubuntu/dists/ URL.
+
+    To avoid matching development releases look for distributions that have
+    updates, so the resulting distro could be any supported release.
+    """
     url += "?C=M;O=D" # Descending Sort by Last Modified
     for link in get_links_from_url(url, d):
-        if link[-8:] == "-updates":
-            return link[:-8]
+        if "-updates" in link:
+            distro = link.replace("-updates", "")
+            return distro
     return "_NotFound_"
 
 def get_latest_released_ubuntu_source_package_list(d):
@@ -192,52 +150,45 @@  def get_latest_released_ubuntu_source_package_list(d):
     latest = find_latest_ubuntu_release("http://archive.ubuntu.com/ubuntu/dists/", d)
     url = "http://archive.ubuntu.com/ubuntu/dists/%s/main/source/Sources.gz" % latest
     package_names = get_debian_style_source_package_list(url, "main", d)
-#    url = "http://archive.ubuntu.com/ubuntu/dists/%s/multiverse/source/Sources.gz" % latest
-#    package_names += get_debian_style_source_package_list(url, "multiverse")
-#    url = "http://archive.ubuntu.com/ubuntu/dists/%s/universe/source/Sources.gz" % latest
-#    package_names += get_debian_style_source_package_list(url, "universe")
     url = "http://archive.ubuntu.com/ubuntu/dists/%s-updates/main/source/Sources.gz" % latest
-    package_names += get_debian_style_source_package_list(url, "updates", d)
-    package_list=clean_package_list(package_names)
-    return latest, package_list
+    package_names |= get_debian_style_source_package_list(url, "updates", d)
+    return latest, package_names
 
 def create_distro_packages_list(distro_check_dir, d):
+    import shutil
+
     pkglst_dir = os.path.join(distro_check_dir, "package_lists")
-    if not os.path.isdir (pkglst_dir):
-        os.makedirs(pkglst_dir)
-    # first clear old stuff
-    for file in os.listdir(pkglst_dir):
-        os.unlink(os.path.join(pkglst_dir, file))
- 
-    per_distro_functions = [
-                            ["Debian", get_latest_released_debian_source_package_list],
-                            ["Ubuntu", get_latest_released_ubuntu_source_package_list],
-                            ["Fedora", get_latest_released_fedora_source_package_list],
-                            ["OpenSuSE", get_latest_released_opensuse_source_package_list],
-                            ["Mandriva", get_latest_released_mandriva_source_package_list],
-                            ["Meego", get_latest_released_meego_source_package_list]
-                           ]
- 
-    from datetime import datetime
-    begin = datetime.now()
-    for distro in per_distro_functions:
-        name = distro[0]
-        release, package_list = distro[1](d)
+    bb.utils.remove(pkglst_dir, True)
+    bb.utils.mkdirhier(pkglst_dir)
+
+    per_distro_functions = (
+                            ("Debian", get_latest_released_debian_source_package_list),
+                            ("Ubuntu", get_latest_released_ubuntu_source_package_list),
+                            ("Fedora", get_latest_released_fedora_source_package_list),
+                            ("OpenSuSE", get_latest_released_opensuse_source_package_list),
+                            ("Mandriva", get_latest_released_mandriva_source_package_list),
+                            ("Meego", get_latest_released_meego_source_package_list)
+                           )
+
+    for name, fetcher_func in per_distro_functions:
+        try:
+            release, package_list = fetcher_func(d)
+        except Exception as e:
+            bb.warn("Cannot fetch packages for %s: %s" % (name, e))
         bb.note("Distro: %s, Latest Release: %s, # src packages: %d" % (name, release, len(package_list)))
+        if len(package_list) == 0:
+            bb.error("Didn't fetch any packages for %s %s" % (name, release))
+
         package_list_file = os.path.join(pkglst_dir, name + "-" + release)
-        f = open(package_list_file, "w+b")
-        for pkg in package_list:
-            f.write(pkg + "\n")
-        f.close()
-    end = datetime.now()
-    delta = end - begin
-    bb.note("package_list generatiosn took this much time: %d seconds" % delta.seconds)
+        with open(package_list_file, 'w') as f:
+            for pkg in sorted(package_list):
+                f.write(pkg + "\n")
 
 def update_distro_data(distro_check_dir, datetime, d):
     """
-        If distro packages list data is old then rebuild it.
-        The operations has to be protected by a lock so that
-        only one thread performes it at a time.
+    If distro packages list data is old then rebuild it.
+    The operations has to be protected by a lock so that
+    only one thread performes it at a time.
     """
     if not os.path.isdir (distro_check_dir):
         try:
@@ -264,25 +215,22 @@  def update_distro_data(distro_check_dir, datetime, d):
             f.seek(0)
             f.write(datetime)
 
-    except OSError:
-        raise Exception('Unable to read/write this file: %s' % (datetime_file))
+    except OSError as e:
+        raise Exception('Unable to open timestamp: %s' % e)
     finally:
         fcntl.lockf(f, fcntl.LOCK_UN)
         f.close()
- 
+
 def compare_in_distro_packages_list(distro_check_dir, d):
     if not os.path.isdir(distro_check_dir):
         raise Exception("compare_in_distro_packages_list: invalid distro_check_dir passed")
-        
+
     localdata = bb.data.createCopy(d)
     pkglst_dir = os.path.join(distro_check_dir, "package_lists")
     matching_distros = []
-    pn = d.getVar('PN', True)
-    recipe_name = d.getVar('PN', True)
+    pn = recipe_name = d.getVar('PN', True)
     bb.note("Checking: %s" % pn)
 
-    trim_dict = dict({"-native":"-native", "-cross":"-cross", "-initial":"-initial"})
-
     if pn.find("-native") != -1:
         pnstripped = pn.split("-native")
         localdata.setVar('OVERRIDES', "pn-" + pnstripped[0] + ":" + d.getVar('OVERRIDES', True))
@@ -308,27 +256,22 @@  def compare_in_distro_packages_list(distro_check_dir, d):
         recipe_name = pnstripped[0]
 
     bb.note("Recipe: %s" % recipe_name)
-    tmp = localdata.getVar('DISTRO_PN_ALIAS', True)
 
     distro_exceptions = dict({"OE-Core":'OE-Core', "OpenedHand":'OpenedHand', "Intel":'Intel', "Upstream":'Upstream', "Windriver":'Windriver', "OSPDT":'OSPDT Approved', "Poky":'poky'})
-
-    if tmp:
-        list = tmp.split(' ')
-        for str in list:
-            if str and str.find("=") == -1 and distro_exceptions[str]:
-                matching_distros.append(str)
+    tmp = localdata.getVar('DISTRO_PN_ALIAS', True) or ""
+    for str in tmp.split():
+        if str and str.find("=") == -1 and distro_exceptions[str]:
+            matching_distros.append(str)
 
     distro_pn_aliases = {}
-    if tmp:
-        list = tmp.split(' ')
-        for str in list:
-            if str.find("=") != -1:
-                (dist, pn_alias) = str.split('=')
-                distro_pn_aliases[dist.strip().lower()] = pn_alias.strip()
- 
+    for str in tmp.split():
+        if "=" in str:
+            (dist, pn_alias) = str.split('=')
+            distro_pn_aliases[dist.strip().lower()] = pn_alias.strip()
+
     for file in os.listdir(pkglst_dir):
         (distro, distro_release) = file.split("-")
-        f = open(os.path.join(pkglst_dir, file), "rb")
+        f = open(os.path.join(pkglst_dir, file), "r")
         for line in f:
             (pkg, section) = line.split(":")
             if distro.lower() in distro_pn_aliases:
@@ -341,16 +284,12 @@  def compare_in_distro_packages_list(distro_check_dir, d):
                 break
         f.close()
 
-    
-    if tmp != None:
-        list = tmp.split(' ')
-        for item in list:
-            matching_distros.append(item)
+    for item in tmp.split():
+        matching_distros.append(item)
     bb.note("Matching: %s" % matching_distros)
     return matching_distros
 
 def create_log_file(d, logname):
-    import subprocess
     logpath = d.getVar('LOG_DIR', True)
     bb.utils.mkdirhier(logpath)
     logfn, logsuffix = os.path.splitext(logname)
@@ -359,7 +298,7 @@  def create_log_file(d, logname):
             slogfile = os.path.join(logpath, logname)
             if os.path.exists(slogfile):
                     os.remove(slogfile)
-            subprocess.call("touch %s" % logfile, shell=True)
+            open(logfile, 'w+').close()
             os.symlink(logfile, slogfile)
             d.setVar('LOG_FILE', logfile)
     return logfile
@@ -371,8 +310,8 @@  def save_distro_check_result(result, datetime, result_file, d):
     if not logdir:
         bb.error("LOG_DIR variable is not defined, can't write the distro_check results")
         return
-    if not os.path.isdir(logdir):
-        os.makedirs(logdir)
+    bb.utils.mkdirhier(logdir)
+
     line = pn
     for i in result:
         line = line + "," + i