diff mbox

[Branch,~linaro-validation/lava-scheduler/trunk] Rev 64: If the connection to the DB fails or drops for the scheduler daemon, close the

Message ID 20110818022712.28308.88849.launchpad@ackee.canonical.com
State Accepted
Headers show

Commit Message

Michael-Doyle Hudson Aug. 18, 2011, 2:27 a.m. UTC
Merge authors:
  Michael Hudson-Doyle (mwhudson)
------------------------------------------------------------
revno: 64 [merge]
committer: Michael-Doyle Hudson <michael.hudson@linaro.org>
branch nick: trunk
timestamp: Thu 2011-08-18 14:21:39 +1200
message:
  If the connection to the DB fails or drops for the scheduler daemon, close the
  connection so that we attempt to reopen it on the next access.
  
  Also improve the errbacks that handle errors to log the traceback as well.
modified:
  lava_scheduler_daemon/board.py
  lava_scheduler_daemon/dbjobsource.py
  lava_scheduler_daemon/service.py


--
lp:lava-scheduler
https://code.launchpad.net/~linaro-validation/lava-scheduler/trunk

You are subscribed to branch lp:lava-scheduler.
To unsubscribe from this branch go to https://code.launchpad.net/~linaro-validation/lava-scheduler/trunk/+edit-subscription
diff mbox

Patch

=== modified file 'lava_scheduler_daemon/board.py'
--- lava_scheduler_daemon/board.py	2011-08-16 04:07:08 +0000
+++ lava_scheduler_daemon/board.py	2011-08-18 02:19:55 +0000
@@ -199,7 +199,9 @@ 
             self._maybeStartJob, self._ebCheckForJob)
 
     def _ebCheckForJob(self, result):
-        self.logger.exception(result.value)
+        self.logger.error(
+            '%s: %s\n%s', result.type.__name__, result.value,
+            result.getTraceback())
         self._maybeStartJob(None)
 
     def _finish_stop(self):

=== modified file 'lava_scheduler_daemon/dbjobsource.py'
--- lava_scheduler_daemon/dbjobsource.py	2011-08-17 03:09:36 +0000
+++ lava_scheduler_daemon/dbjobsource.py	2011-08-18 02:19:55 +0000
@@ -3,8 +3,10 @@ 
 import logging
 
 from django.core.files.base import ContentFile
+from django.db import connection
 from django.db import IntegrityError, transaction
 from django.db.models import Q
+from django.db.utils import DatabaseError
 
 from twisted.internet.threads import deferToThread
 
@@ -13,6 +15,13 @@ 
 from lava_scheduler_app.models import Device, TestJob
 from lava_scheduler_daemon.jobsource import IJobSource
 
+try:
+    from psycopg2 import InterfaceError, OperationalError
+except ImportError:
+    class InterfaceError(Exception):
+        pass
+    class OperationalError(Exception):
+        pass
 
 
 class DatabaseJobSource(object):
@@ -24,8 +33,28 @@ 
     def getBoardList_impl(self):
         return [d.hostname for d in Device.objects.all()]
 
+    def deferForDB(self, func, *args, **kw):
+        def wrapper(*args, **kw):
+            try:
+                return func(*args, **kw)
+            except (DatabaseError, OperationalError, InterfaceError), error:
+                message = str(error)
+                if message == 'connection already closed' or \
+                   message.startswith(
+                    'terminating connection due to administrator command') or \
+                   message.startswith(
+                    'could not connect to server: Connection refused'):
+                    self.logger.warning(
+                        'Forcing reconnection on next db access attempt')
+                    if connection.connection:
+                        if not connection.connection.closed:
+                            connection.connection.close()
+                        connection.connection = None
+                raise
+        return deferToThread(wrapper, *args, **kw)
+
     def getBoardList(self):
-        return deferToThread(self.getBoardList_impl)
+        return self.deferForDB(self.getBoardList_impl)
 
     @transaction.commit_manually()
     def getJobForBoard_impl(self, board_name):
@@ -79,7 +108,7 @@ 
                 return None
 
     def getJobForBoard(self, board_name):
-        return deferToThread(self.getJobForBoard_impl, board_name)
+        return self.deferForDB(self.getJobForBoard_impl, board_name)
 
     @transaction.commit_on_success()
     def jobCompleted_impl(self, board_name):
@@ -94,7 +123,7 @@ 
         job.save()
 
     def jobCompleted(self, board_name):
-        return deferToThread(self.jobCompleted_impl, board_name)
+        return self.deferForDB(self.jobCompleted_impl, board_name)
 
     @transaction.commit_on_success()
     def jobOobData_impl(self, board_name, key, value):
@@ -106,5 +135,4 @@ 
             device.current_job.save()
 
     def jobOobData(self, board_name, key, value):
-        return deferToThread(self.jobOobData_impl, board_name, key, value)
-
+        return self.deferForDB(self.jobOobData_impl, board_name, key, value)

=== modified file 'lava_scheduler_daemon/service.py'
--- lava_scheduler_daemon/service.py	2011-07-27 06:59:33 +0000
+++ lava_scheduler_daemon/service.py	2011-08-17 05:33:25 +0000
@@ -21,7 +21,12 @@ 
 
     def _updateBoards(self):
         self.logger.debug("Refreshing board list")
-        return self.source.getBoardList().addCallback(self._cbUpdateBoards)
+        def _eb(failure):
+            self.logger.error(
+                '%s: %s\n%s', failure.type.__name__, failure.value,
+                failure.getTraceback())
+        return self.source.getBoardList().addCallback(
+            self._cbUpdateBoards).addErrback(_eb)
 
     def _cbUpdateBoards(self, board_names):
         if set(board_names) == set(self.boards):