[Branch,~linaro-validation/lava-dispatcher/trunk] Rev 77: improve error handling

Message ID 20110721024715.6703.59285.launchpad@loganberry.canonical.com
State Accepted
Headers show

Commit Message

Paul Larson July 21, 2011, 2:47 a.m.
Merge authors:
  Spring Zhang (qzhang)
Related merge proposals:
  https://code.launchpad.net/~qzhang/lava-dispatcher/simple-error-handler/+merge/64054
  proposed by: Spring Zhang (qzhang)
  review: Needs Fixing - Paul Larson (pwlars)
  review: Resubmit - Spring Zhang (qzhang)
------------------------------------------------------------
revno: 77 [merge]
committer: Paul Larson <paul.larson@canonical.com>
branch nick: lava-dispatcher
timestamp: Wed 2011-07-20 19:13:21 -0500
message:
  improve error handling
modified:
  lava_dispatcher/__init__.py
  lava_dispatcher/actions/android_0xbench.py
  lava_dispatcher/actions/android_deploy.py
  lava_dispatcher/actions/deploy.py
  lava_dispatcher/actions/lava-test.py
  lava_dispatcher/android_client.py
  lava_dispatcher/client.py


--
lp:lava-dispatcher
https://code.launchpad.net/~linaro-validation/lava-dispatcher/trunk

You are subscribed to branch lp:lava-dispatcher.
To unsubscribe from this branch go to https://code.launchpad.net/~linaro-validation/lava-dispatcher/trunk/+edit-subscription

Patch

=== modified file 'lava_dispatcher/__init__.py'
--- lava_dispatcher/__init__.py	2011-06-27 04:55:08 +0000
+++ lava_dispatcher/__init__.py	2011-07-20 07:30:07 +0000
@@ -18,13 +18,17 @@ 
 # along
 # with this program; if not, see <http://www.gnu.org/licenses>.
 
+import sys
 from datetime import datetime
 import json
+import traceback
+from uuid import uuid1
+import base64
+import pexpect
+
 from lava_dispatcher.actions import get_all_cmds
-from lava_dispatcher.client import LavaClient
+from lava_dispatcher.client import LavaClient, CriticalError, GeneralError
 from lava_dispatcher.android_client import LavaAndroidClient
-from uuid import uuid1
-import base64
 
 class LavaTestJob(object):
     def __init__(self, job_json):
@@ -59,18 +63,41 @@ 
                 metadata['target.hostname'] = self.target
                 self.context.test_data.add_metadata(metadata)
                 action = lava_commands[cmd['command']](self.context)
-                action.run(**params)
+                try:
+                    status = 'fail'
+                    action.run(**params)
+                except CriticalError, err:
+                    raise err
+                except (pexpect.TIMEOUT, GeneralError), err:
+                    pass
+                except Exception, err:
+                    raise
+                else:
+                    status = 'pass'
+                finally:
+                    if status == 'fail':
+                        err_msg = "Lava failed at action " + cmd['command'] \
+                            + " with error: " + str(err) + "\n"
+                        if cmd['command'] == 'lava_test_run':
+                            err_msg = err_msg + "Lava failed with test: " \
+                                + test_name
+                        exc_type, exc_value, exc_traceback = sys.exc_info()
+                        err_msg = err_msg + repr(traceback.format_tb(exc_traceback))
+                        print >> sys.stderr, err_msg
+                    else:
+                        err_msg = ""
+                    self.context.test_data.add_result(cmd['command'], 
+                        status, err_msg)
         except:
-                #FIXME: need to capture exceptions for later logging
-                #and try to continue from where we left off
-                self.context.test_data.job_status='fail'
-                raise
+            #Capture all user-defined and non-user-defined critical errors
+            self.context.test_data.job_status='fail'
+            raise
         finally:
-                if submit_results:
-                    params = submit_results.get('parameters', {})
-                    action = lava_commands[submit_results['command']](
-                        self.context)
-                    action.run(**params)
+            if submit_results:
+                params = submit_results.get('parameters', {})
+                action = lava_commands[submit_results['command']](
+                    self.context)
+                action.run(**params)
 
 
 class LavaContext(object):
@@ -112,8 +139,9 @@ 
     def job_status(self, status):
         self._job_status = status
 
-    def add_result(self, test_case_id, result):
-        result_data = { 'test_case_id': test_case_id, 'result':result }
+    def add_result(self, test_case_id, result, message=""):
+        result_data = { 'test_case_id': test_case_id, 'result': result \
+                    , 'message': message}
         self._test_run['test_results'].append(result_data)
 
     def add_attachment(self, attachment):

=== modified file 'lava_dispatcher/actions/android_0xbench.py'
--- lava_dispatcher/actions/android_0xbench.py	2011-06-27 04:55:08 +0000
+++ lava_dispatcher/actions/android_0xbench.py	2011-07-20 06:53:10 +0000
@@ -47,5 +47,6 @@ 
             self.client.android_logcat_monitor(pattern, timeout = 1200)
         except pexpect.TIMEOUT:
             print "0xbench Test: TIMEOUT Fail"
-
-        self.client.android_logcat_stop()
+            raise
+        finally:
+            self.client.android_logcat_stop()

=== modified file 'lava_dispatcher/actions/android_deploy.py'
--- lava_dispatcher/actions/android_deploy.py	2011-07-06 17:16:22 +0000
+++ lava_dispatcher/actions/android_deploy.py	2011-07-20 10:08:14 +0000
@@ -25,6 +25,7 @@ 
 import shutil
 from tempfile import mkdtemp
 from lava_dispatcher.utils import download, download_with_cache
+from lava_dispatcher.client import CriticalError
 
 class cmd_deploy_linaro_android_image(BaseAction):
     def run(self, boot, system, data, use_cache=True):
@@ -37,10 +38,16 @@ 
         client.boot_master_image()
 
         print "Waiting for network to come up"
-        client.wait_network_up()
+        try:
+            client.wait_network_up()
+        except:
+            raise CriticalError("Network can't probe up when deployment")
 
-        boot_tbz2, system_tbz2, data_tbz2 = self.download_tarballs(boot,
-            system, data, use_cache)
+        try:
+            boot_tbz2, system_tbz2, data_tbz2 = self.download_tarballs(boot,
+                system, data, use_cache)
+        except:
+            raise CriticalError("Package can't download when deployment")
 
         boot_tarball = boot_tbz2.replace(LAVA_IMAGE_TMPDIR, '')
         system_tarball = system_tbz2.replace(LAVA_IMAGE_TMPDIR, '')
@@ -58,7 +65,7 @@ 
             self.deploy_linaro_android_testrootfs(system_url)
             self.purge_linaro_android_sdcard()
         except:
-            raise
+            raise CriticalError("Android deployment failed")
         finally:
             shutil.rmtree(self.tarball_dir)
 

=== modified file 'lava_dispatcher/actions/deploy.py'
--- lava_dispatcher/actions/deploy.py	2011-07-06 17:16:22 +0000
+++ lava_dispatcher/actions/deploy.py	2011-07-20 09:41:30 +0000
@@ -27,6 +27,7 @@ 
 from lava_dispatcher.actions import BaseAction
 from lava_dispatcher.config import LAVA_IMAGE_TMPDIR, LAVA_IMAGE_URL, MASTER_STR
 from lava_dispatcher.utils import download, download_with_cache
+from lava_dispatcher.client import CriticalError
 
 
 class cmd_deploy_linaro_image(BaseAction):
@@ -39,8 +40,16 @@ 
         client.boot_master_image()
 
         print "Waiting for network to come up"
-        client.wait_network_up()
-        boot_tgz, root_tgz = self.generate_tarballs(hwpack, rootfs, use_cache)
+        try:
+            client.wait_network_up()
+        except:
+            raise CriticalError("Network can't probe up when deployment")
+
+        try:
+            boot_tgz, root_tgz = self.generate_tarballs(hwpack, rootfs, 
+                use_cache)
+        except:
+            raise CriticalError("Deployment tarballs preparation failed")
         boot_tarball = boot_tgz.replace(LAVA_IMAGE_TMPDIR, '')
         root_tarball = root_tgz.replace(LAVA_IMAGE_TMPDIR, '')
         boot_url = '/'.join(u.strip('/') for u in [
@@ -51,7 +60,7 @@ 
             self.deploy_linaro_rootfs(root_url)
             self.deploy_linaro_bootfs(boot_url)
         except:
-            raise
+            raise CriticalError("Deployment failed")
         finally:
             shutil.rmtree(self.tarball_dir)
 

=== modified file 'lava_dispatcher/actions/lava-test.py'
--- lava_dispatcher/actions/lava-test.py	2011-07-06 18:47:51 +0000
+++ lava_dispatcher/actions/lava-test.py	2011-07-20 05:27:33 +0000
@@ -92,7 +92,7 @@ 
         try:
             client.run_shell_command(
                 'chroot /mnt/root lava-test help',
-                response="list-tests")
+                response="list-tests", timeout=10)
         except:
             raise OperationFailed("lava-test deployment failed")
 

=== modified file 'lava_dispatcher/android_client.py'
--- lava_dispatcher/android_client.py	2011-06-29 10:22:40 +0000
+++ lava_dispatcher/android_client.py	2011-07-20 04:45:23 +0000
@@ -19,7 +19,7 @@ 
 
 import pexpect
 import sys
-from lava_dispatcher.client import LavaClient
+from lava_dispatcher.client import LavaClient, OperationFailed
 from lava_dispatcher.android_config import BOARDS, TESTER_STR
 
 class LavaAndroidClient(LavaClient):

=== modified file 'lava_dispatcher/client.py'
--- lava_dispatcher/client.py	2011-07-01 06:08:43 +0000
+++ lava_dispatcher/client.py	2011-07-20 05:27:33 +0000
@@ -153,15 +153,27 @@ 
     def getvalue(self):
         return self.serialio.getvalue()
 
-
-class NetworkError(Exception):
+class DispatcherError(Exception):
+    """
+    Base exception and error class for dispatcher
+    """
+
+class CriticalError(DispatcherError):
+    """
+    The critical error
+    """
+
+class GeneralError(DispatcherError):
+    """
+    The non-critical error
+    """
+
+class NetworkError(CriticalError):
     """
     This is used when a network error occurs, such as failing to bring up
     the network interface on the client
     """
 
-
-class OperationFailed(Exception):
+class OperationFailed(GeneralError):
     pass
 
-