=== modified file 'lava_scheduler_daemon/board.py'
@@ -199,7 +199,9 @@
self._maybeStartJob, self._ebCheckForJob)
def _ebCheckForJob(self, result):
- self.logger.exception(result.value)
+ self.logger.error(
+ '%s: %s\n%s', result.type.__name__, result.value,
+ result.getTraceback())
self._maybeStartJob(None)
def _finish_stop(self):
=== modified file 'lava_scheduler_daemon/dbjobsource.py'
@@ -3,8 +3,10 @@
import logging
from django.core.files.base import ContentFile
+from django.db import connection
from django.db import IntegrityError, transaction
from django.db.models import Q
+from django.db.utils import DatabaseError
from twisted.internet.threads import deferToThread
@@ -13,6 +15,13 @@
from lava_scheduler_app.models import Device, TestJob
from lava_scheduler_daemon.jobsource import IJobSource
+try:
+ from psycopg2 import InterfaceError, OperationalError
+except ImportError:
+ class InterfaceError(Exception):
+ pass
+ class OperationalError(Exception):
+ pass
class DatabaseJobSource(object):
@@ -24,8 +33,28 @@
def getBoardList_impl(self):
return [d.hostname for d in Device.objects.all()]
+ def deferForDB(self, func, *args, **kw):
+ def wrapper(*args, **kw):
+ try:
+ return func(*args, **kw)
+ except (DatabaseError, OperationalError, InterfaceError), error:
+ message = str(error)
+ if message == 'connection already closed' or \
+ message.startswith(
+ 'terminating connection due to administrator command') or \
+ message.startswith(
+ 'could not connect to server: Connection refused'):
+ self.logger.warning(
+ 'Forcing reconnection on next db access attempt')
+ if connection.connection:
+ if not connection.connection.closed:
+ connection.connection.close()
+ connection.connection = None
+ raise
+ return deferToThread(wrapper, *args, **kw)
+
def getBoardList(self):
- return deferToThread(self.getBoardList_impl)
+ return self.deferForDB(self.getBoardList_impl)
@transaction.commit_manually()
def getJobForBoard_impl(self, board_name):
@@ -79,7 +108,7 @@
return None
def getJobForBoard(self, board_name):
- return deferToThread(self.getJobForBoard_impl, board_name)
+ return self.deferForDB(self.getJobForBoard_impl, board_name)
@transaction.commit_on_success()
def jobCompleted_impl(self, board_name):
@@ -94,7 +123,7 @@
job.save()
def jobCompleted(self, board_name):
- return deferToThread(self.jobCompleted_impl, board_name)
+ return self.deferForDB(self.jobCompleted_impl, board_name)
@transaction.commit_on_success()
def jobOobData_impl(self, board_name, key, value):
@@ -106,5 +135,4 @@
device.current_job.save()
def jobOobData(self, board_name, key, value):
- return deferToThread(self.jobOobData_impl, board_name, key, value)
-
+ return self.deferForDB(self.jobOobData_impl, board_name, key, value)
=== modified file 'lava_scheduler_daemon/service.py'
@@ -21,7 +21,12 @@
def _updateBoards(self):
self.logger.debug("Refreshing board list")
- return self.source.getBoardList().addCallback(self._cbUpdateBoards)
+ def _eb(failure):
+ self.logger.error(
+ '%s: %s\n%s', failure.type.__name__, failure.value,
+ failure.getTraceback())
+ return self.source.getBoardList().addCallback(
+ self._cbUpdateBoards).addErrback(_eb)
def _cbUpdateBoards(self, board_names):
if set(board_names) == set(self.boards):