diff mbox

[Branch,~linaro-image-tools/linaro-image-tools/trunk] Rev 455: Merging in indexer update.

Message ID 20111027171324.10591.2146.launchpad@ackee.canonical.com
State Accepted
Headers show

Commit Message

James Tunnicliffe Oct. 27, 2011, 5:13 p.m. UTC
Merge authors:
  James Tunnicliffe (dooferlad)
  Mattias Backman (mabac)
Related merge proposals:
  https://code.launchpad.net/~dooferlad/linaro-image-tools/indexer_update/+merge/80575
  proposed by: James Tunnicliffe (dooferlad)
  review: Approve - Mattias Backman (mabac)
  review: Resubmit - James Tunnicliffe (dooferlad)
------------------------------------------------------------
revno: 455 [merge]
committer: James Tunnicliffe <james.tunnicliffe@linaro.org>
branch nick: linaro-image-tools
timestamp: Thu 2011-10-27 18:10:51 +0100
message:
  Merging in indexer update.
modified:
  linaro-image-indexer
  linaro_image_tools/fetch_image.py
  linaro_image_tools/fetch_image_settings.yaml


--
lp:linaro-image-tools
https://code.launchpad.net/~linaro-image-tools/linaro-image-tools/trunk

You are subscribed to branch lp:linaro-image-tools.
To unsubscribe from this branch go to https://code.launchpad.net/~linaro-image-tools/linaro-image-tools/trunk/+edit-subscription
diff mbox

Patch

=== modified file 'linaro-image-indexer'
--- linaro-image-indexer	2011-07-26 10:56:17 +0000
+++ linaro-image-indexer	2011-10-27 16:10:01 +0000
@@ -27,51 +27,93 @@ 
 import bz2
 import linaro_image_tools.fetch_image
 
-RELEASES_WWW_DOCUMENT_ROOT  = "/srv/releases.linaro.org/www/platform/"
-RELEASE_URL                 = "http://releases.linaro.org/platform/"
+RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www"
+RELEASE_URL = "http://releases.linaro.org/"
+OLD_RELEASES_WWW_DOCUMENT_ROOT = "/srv/releases.linaro.org/www/platform"
+OLD_RELEASE_URL = "http://releases.linaro.org/platform/"
 SNAPSHOTS_WWW_DOCUMENT_ROOT = "/srv/snapshots.linaro.org/www/"
-SNAPSHOTS_URL               = "http://snapshots.linaro.org/"
+SNAPSHOTS_URL = "http://snapshots.linaro.org/"
 
 class ServerIndexer():
     """Create a database of files on the linaro image servers for use by image
        creation tools."""
     def reset(self):
-        self.url_parse = {}
+        self.url_parse = []
 
     def __init__(self):
         self.reset()
         self.db_file_name = "server_index"
         self.db = linaro_image_tools.fetch_image.DB(self.db_file_name)
 
+    def regexp_list_matches_some(self, to_search, list):
+        assert len(list), "empty list passed"
+
+        for item in list:
+            if re.search(item, to_search):
+                return True
+
+        return False
+
+    def regexp_list_matches_all(self, to_search, list):
+        assert len(list), "empty list passed"
+
+        for item in list:
+            if not re.search(item, to_search):
+                return False
+
+        return True
+
     def crawl(self):
         self.db.set_url_parse_info(self.url_parse)
-        logging.info(self.url_parse.items())
-        
-        for table, info in self.url_parse.items():
-            logging.info(info["base_dir"], ":", info["base_url"], table,
-                         info["url_validator"], info["url_chunks"])
-            self.go(info["base_dir"], info["base_url"], table)
-            logging.info("")
-
-    def go(self, root_dir_, root_url_, table_):
-        for root, subFolders, files in os.walk( root_dir_ ):
-
-            # --- Temporary hack to work around bug:
-            # https://bugs.launchpad.net/linaro-image-tools/+bug/816015
-            # For the moment we just index platform == 11.05-daily when
-            # indexing the snapshots server.
-            if re.search("11.05-daily", root) or re.search("release", table_):
-                for file in files:
-                    if(re.search('\.gz$', file)):
-                        # Construct a URL to the file and save in the database
-                        relative_location = re.sub(root_dir_, "", 
-                                                   os.path.join(root, file))
-                        url = urlparse.urljoin(root_url_, relative_location)
-                        url = urlparse.urljoin(url, file)
-                       
-                        if not re.search('/leb-panda/', url):
-                            logging.info(url)
-                            self.db.record_url(url, table_)
+        logging.getLogger("linaro_image_tools").info(self.url_parse)
+
+        for index in range(len(self.url_parse)):
+            info = self.url_parse[index]
+            table = info["table"]
+            
+            logging.getLogger("linaro_image_tools").info("%s %s %s %s %s" % \
+                (info["base_dir"], info["base_url"], table,
+                info["url_validator"], info["url_chunks"]))
+
+            self.go(info, table, index)
+
+            logging.getLogger("linaro_image_tools").info("")
+
+
+    def go(self, info, table, index):
+        root_url = info["base_url"]
+        root_dir = info["base_dir"]
+
+        for root, subFolders, files in os.walk( root_dir ):
+            for file in files:
+                relative_location = re.sub(root_dir, "",
+                                           os.path.join(root, file))
+                relative_location = relative_location.lstrip("/")
+                
+                to_match = info["url_validator"][0]
+                not_match = info["url_validator"][1]
+
+                url = urlparse.urljoin(root_url, relative_location)
+                url = urlparse.urljoin(url, file)
+
+                to_match_ok = False
+                if len(to_match) == 0:
+                    to_match_ok = True
+                if len(to_match) and self.regexp_list_matches_all(
+                                            relative_location, to_match):
+                    to_match_ok = True
+
+                not_match_ok = True
+                if len(not_match) and self.regexp_list_matches_some(
+                                        relative_location, not_match):
+                    not_match_ok = False
+
+                if( not (to_match_ok and not_match_ok)
+                   or not re.search("\.gz$", file)):
+                    continue  # URL doesn't match the validator. Ignore.
+
+                logging.getLogger("linaro_image_tools").info(url)
+                self.db.record_url(url, index)
                     
         self.dump() 
 
@@ -88,22 +130,25 @@ 
         bz2_db_file.close()
 
     def add_directory_parse_list(self,
-                                 base_dir_,
-                                 base_url_,
-                                 url_validator_,
-                                 id_,
-                                 url_chunks_):
+                                 base_dir,
+                                 base_url,
+                                 url_validator,
+                                 db_columns,
+                                 table,
+                                 url_chunks):
         
-        if(not id_ in self.url_parse):
-            self.url_parse[id_] = {"base_dir":      base_dir_,
-                                   "base_url":      base_url_,
-                                   "url_validator": url_validator_,
-                                   "url_chunks":    url_chunks_}
-            logging.info(self.url_parse[id_]["base_dir"])
+        if not id in self.url_parse:
+            self.url_parse.append({"base_dir": base_dir,
+                                   "base_url": base_url,
+                                   "url_validator": url_validator,
+                                   "db_columns": db_columns,
+                                   "url_chunks": url_chunks,
+                                   "table": table})
+            logging.getLogger("linaro_image_tools").info(base_dir)
 
             # Construct data needed to create the table
             items = []
-            for item in url_chunks_:
+            for item in url_chunks:
                 if(item != ""):
                     # If the entry is a tuple, it indicates it is of the
                     # form name, regexp
@@ -112,7 +157,7 @@ 
                     else:
                         items.append(item)
 
-            self.db.create_table_with_url_text_items(id_, items)
+            self.db.create_table_with_name_columns(table, db_columns)
 
     def clean_removed_urls_from_db(self):
         self.db.clean_removed_urls_from_db()
@@ -120,40 +165,72 @@ 
 if __name__ == '__main__':
     crawler = ServerIndexer()
 
-    # The use of a zero width assertion here to look for links that don't 
-    # contain /hwpacks/ is a bit scary and could be replaced by a tuple of
-    # (False, r"hwpacks"), where the first parameter could indicate that we
-    # want the regexp to fail if we are to use the URL. May be a bit nicer.
-    
-    #http://releases.linaro.org/platform/linaro-m/plasma/final/
-    crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
-                                     RELEASE_URL,
-                                     r"^((?!hwpack).)*$",
-                                     "release_binaries",
-                                     ["platform", "image", "build"])
-
-    #http://releases.linaro.org/platform/linaro-m/hwpacks/final/hwpack_linaro-bsp-omap4_20101109-1_armel_unsupported.tar.gz
-    crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
-                                     RELEASE_URL,
-                                     r"/hwpacks/",
-                                     "release_hwpacks",
-                                     ["platform", "", "build",
-                                      ("hardware", r"hwpack_linaro-(.*?)_")])
-    
-    #http://snapshots.linaro.org/11.05-daily/linaro-alip/20110420/0/images/tar/
+    ch = logging.StreamHandler()
+    ch.setLevel(logging.CRITICAL)
+    formatter = logging.Formatter("%(message)s")
+    ch.setFormatter(formatter)
+    logger = logging.getLogger("linaro_image_tools")
+    logger.setLevel(logging.CRITICAL)
+    logger.addHandler(ch)
+
+    #linaro-n/ubuntu-desktop/11.09
+    crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,
+                                     OLD_RELEASE_URL,
+                                     ([], ["platform/", "old/", "hwpack",
+                                           "alpha", "beta", "final", "leb",
+                                           "leb", "release-candidate"]),
+                                     ["platform", "image", "build=final"],
+                                     "release_binaries",
+                                     ["", "image", "platform"])
+
+    #linaro-n/hwpacks/11.09
+    crawler.add_directory_parse_list(OLD_RELEASES_WWW_DOCUMENT_ROOT,
+                                     OLD_RELEASE_URL,
+                                     (["/hwpacks/"],
+                                      ["alpha", "beta", "final", "leb",
+                                       "release-candidate"]),
+                                     ["platform", "hardware", "build=final"],
+                                     "release_hwpacks",
+                                     ["", "", "platform",
+                                      ("hardware", r"hwpack_linaro-(.*?)_")])
+
+    # 11.10/ubuntu/oneiric-images/ubuntu-desktop/
+    # NOT images/...
+    crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
+                                     RELEASE_URL,
+                                     (["\d+\.\d+", "ubuntu", "oneiric-images"],
+                                      ["latest/", "platform/", "old/",
+                                       "hwpack", "^images/"]),
+                                     ["platform", "image", "build=final"],
+                                     "release_binaries",
+                                     ["platform", "", "", "image"])
+
+    # 11.10/ubuntu/oneiric-hwpacks/
+    crawler.add_directory_parse_list(RELEASES_WWW_DOCUMENT_ROOT,
+                                     RELEASE_URL,
+                                     (["\d+\.\d+", "ubuntu", "oneiric-hwpacks"],
+                                      ["latest/", "platform/", "old/",
+                                       "^images/"]),
+                                     ["platform", "hardware", "build=final"],
+                                     "release_hwpacks",
+                                     ["platform", "", "",
+                                      ("hardware", r"hwpack_linaro-(.*?)_")])
+
+    #oneiric/linaro-o-alip/20111026/0/images/tar/
     crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,
                                      SNAPSHOTS_URL,
-                                     r"^((?!hwpack).)*$",
+                                     (["^oneiric/"], ["/hwpack"]),
+                                     ["platform", "image", "date", "build"],
                                      "snapshot_binaries",
                                      ["platform", "image", "date", "build"])
 
-    #http://snapshots.linaro.org/11.05-daily/linaro-hwpacks/omap3/20110420/0/images/hwpack/
+    #oneiric/lt-panda-oneiric/20111026/0/images/hwpack/
     crawler.add_directory_parse_list(SNAPSHOTS_WWW_DOCUMENT_ROOT,
                                      SNAPSHOTS_URL,
-                                     r"/hwpack/",
+                                     (["^oneiric/", "/hwpack"], []),
+                                     ["platform", "hardware", "date", "build"],
                                      "snapshot_hwpacks",
-                                     ["platform", "", "hardware", "date",
-                                      "build"])
+                                     ["platform", "hardware", "date", "build"])
 
     crawler.crawl()
     crawler.clean_removed_urls_from_db()

=== modified file 'linaro_image_tools/fetch_image.py'
--- linaro_image_tools/fetch_image.py	2011-10-20 15:00:06 +0000
+++ linaro_image_tools/fetch_image.py	2011-10-27 14:31:55 +0000
@@ -36,7 +36,6 @@ 
 import threading
 import subprocess
 import utils
-import xdg.BaseDirectory as xdgBaseDir
 
 QEMU = "qemu"
 HARDWARE = "hardware"
@@ -530,6 +529,10 @@ 
     """Downloads files and creates images from them by calling
     linaro-media-create"""
     def __init__(self):
+        # Import xdg here so it isn't required to index the server.
+        # (package not installed)
+        import xdg.BaseDirectory as xdgBaseDir
+
         self.datadir = os.path.join(xdgBaseDir.xdg_data_home,
                                      "linaro",
                                      "image-tools",
@@ -916,31 +919,29 @@ 
     def set_url_parse_info(self, url_parse):
         self.url_parse = url_parse
 
-    def record_url(self, url, table):
+    def record_url(self, url, index):
         """Check to see if the record exists in the index, if not, add it"""
 
-        assert self.url_parse[table]["base_url"] != None, ("Can not match the "
+        assert self.url_parse[index]["base_url"], ("Can not match the "
                "URL received (%s) to an entry provided by add_url_parse_list",
                url)
-        assert re.search('^' + self.url_parse[table]["base_url"], url)
-
-        if(not re.search(self.url_parse[table]["url_validator"], url)):
-            #Make sure that the URL matches the validator
-            return
-
-        logging.info("Recording URL", url)
+        assert re.search('^' + self.url_parse[index]["base_url"], url), (
+            "Base url is not part of the url to record.")
+
+        logging.info("Recording URL %s %d", url, index)
 
         assert url not in self.touched_urls, ("URLs expected to only be added "
                                               "to 1 place\n" + url)
 
         self.touched_urls[url] = True
+        table = self.url_parse[index]["table"]
 
         # Do not add the record if it already exists
         self.c.execute("select url from " + table + " where url == ?", (url,))
-        if(self.c.fetchone()):
+        if self.c.fetchone():
             return
 
-        url_match = re.search(self.url_parse[table]["base_url"] + r"(.*)$",
+        url_match = re.search(self.url_parse[index]["base_url"] + r"(.*)$",
                               url)
         url_chunks = url_match.group(1).lstrip('/').encode('ascii').split('/')
         # url_chunks now contains all parts of the url, split on /,
@@ -949,29 +950,39 @@ 
         # We now construct an SQL command to insert the index data into the
         # database using the information we have.
 
-        # Work out how many values we will insert into the database
+        sqlcmd = "INSERT INTO " + table + " ("
         length = 0
-        for name in self.url_parse[table]["url_chunks"]:
-            if(name != ""):
-                length += 1
-
-        sqlcmd = "insert into " + table + " values ("
+        for name in (self.url_parse[index]["url_chunks"] + ["url"]):
+            if name != "":
+                if isinstance(name, tuple):
+                    name = name[0]
+                sqlcmd += name + ", "
+                length += 1
+
+        # Handle fixed value columns
+        for name in self.url_parse[index]["db_columns"]:
+            name_search = re.search("(\w+)=(.*)", name)
+            if name_search:
+                sqlcmd += name_search.group(1) + ", "
+                length += 1
+
+        sqlcmd = sqlcmd.rstrip(", ")  # get rid of unwanted space & comma
+        sqlcmd += ") VALUES ("
 
         # Add the appropriate number of ?s (+1 is so we have room for url)
-        sqlcmd += "".join(["?, " for x in range(length + 1)])
-        sqlcmd = sqlcmd.rstrip(" ")  # get rid of unwanted space
-        sqlcmd = sqlcmd.rstrip(",")  # get rid of unwanted comma
+        sqlcmd += "".join(["?, " for x in range(length)])
+        sqlcmd = sqlcmd.rstrip(", ")  # get rid of unwanted space and comma
         sqlcmd += ")"
 
         # Get the parameters from the URL to record in the SQL database
         sqlparams = []
         chunk_index = 0
-        for name in self.url_parse[table]["url_chunks"]:
+        for name in self.url_parse[index]["url_chunks"]:
             # If this part of the URL isn't a parameter, don't insert it
-            if(name != ""):
+            if name != "":
                 # If the entry is a tuple, it indicates it is of the form
                 # name, regexp
-                if(isinstance(name, tuple)):
+                if isinstance(name, tuple):
                     # use stored regexp to extract data for the database
                     match = re.search(name[1], url_chunks[chunk_index])
                     assert match, ("Unable to match regexp to string ",
@@ -985,6 +996,13 @@ 
 
         sqlparams.append(url)
 
+        # Handle fixed value columns
+        for name in self.url_parse[index]["db_columns"]:
+            name_search = re.search("(\w+)=(.*)", name)
+            if name_search:
+                sqlparams.append(name_search.group(2))
+
+        logging.info("{0}: {1}".format(sqlcmd, sqlparams))
         self.c.execute(sqlcmd, tuple(sqlparams))
 
     def commit(self):
@@ -995,11 +1013,13 @@ 
             self.commit()
             self.database.close()
 
-    def create_table_with_url_text_items(self, table, items):
+    def create_table_with_name_columns(self, table, items):
         cmd = "create table if not exists "
         cmd += table + " ("
 
+        # Handle fixed items (kept in because a field can no longer be derived)
         for item in items:
+            item = re.sub("=.*", "", item)
             cmd += item + " TEXT, "
 
         cmd += "url TEXT)"
@@ -1031,7 +1051,9 @@ 
     def clean_removed_urls_from_db(self):
         self.c = self.database.cursor()
 
-        for table, info in self.url_parse.items():
+        for info in self.url_parse:
+            table = info["table"]
+
             self.c.execute("select url from " + table)
             to_delete = []
 

=== modified file 'linaro_image_tools/fetch_image_settings.yaml'
--- linaro_image_tools/fetch_image_settings.yaml	2011-10-05 15:54:11 +0000
+++ linaro_image_tools/fetch_image_settings.yaml	2011-10-27 14:38:08 +0000
@@ -10,12 +10,21 @@ 
       platform: Which Linaro platform to build an image from. Specify 'snapshot' to
         use a snapshot rather than a release.
     developer: Developer Tools
+    o-developer: Developer Tools
     graphics: Graphics
+    o-graphics: Graphics
     multimedia: Multimedia
+    o-multimedia: Multimedia
     nano: Nano
+    o-nano: Nano
     ubuntu-desktop: 'LEB: Linaro Ubuntu Desktop'
+    o-ubuntu-desktop: 'LEB: Linaro Ubuntu Desktop'
     ubuntu-desktop::long: Linux for humans on low power machines
     ubuntu-desktop::release_note: Shiny!
+    o-alip: ARM Linux Internet Platform
+    alip: ARM Linux Internet Platform
+    server: Server
+    o-server: Server
   hwpack-descriptions:
     omap3: Basic support for Beagle boards
     omap3-x11-base: Includes support for 3D acceleration
@@ -63,9 +72,12 @@ 
     beagle:
     - omap3
     - omap3-x11-base
+    - omap3-oneiric
+    - omap3-x11-base-oneiric
     
     igep:
     - igep
+    - igep-oneiric
     
     panda:
     - panda
@@ -73,30 +85,41 @@ 
     - bsp-omap4
     - lt-panda-x11-base-natty
     - lt-panda
+    - panda-oneiric
+    - panda-x11-base-oneiric
+    - lt-panda-x11-base-oneiric
+    - lt-panda-oneiric
     
     vexpress:
     - vexpress
+    - vexpress-oneiric
+    - lt-vexpress-a9-oneiric
     
     ux500:
     - bsp-ux500
     
     efikamx:
     - efikamx
+    - efikamx-oneiric
     
     efikasb:
     - efikamx
+    - efikamx-oneiric
     
     mx51evk:
     - imx51
+    - imx51-oneiric
     
     mx53loco:
     - lt-mx5
+    - lt-mx5-oneiric
      
     u8500:
      - lt-u8500
      
     overo:
     - overo
+    - overo-oneiric
      
     smdkv310:
      - lt-s5pv310
@@ -104,9 +127,13 @@ 
 
     origen:
      - lt-origen
+     - lt-origen-oneiric
 
     snowball_sd:
      - lt-snowball
+     - lt-snowball-v2
+     - lt-snowball-v3-oneiric
+     - lt-snowball-v2-oneiric
 
   image:
   - alip
@@ -135,6 +162,8 @@ 
     - final
     '11.09':
     - final
+    '11.10':
+    - final
 
 platform: linaro-n
 repository: release