=== modified file 'lava_scheduler_app/admin.py'
@@ -8,7 +8,7 @@
def offline_action(modeladmin, request, queryset):
- for device in queryset.filter(status__in=[Device.IDLE, Device.RUNNING]):
+ for device in queryset.filter(status__in=[Device.IDLE, Device.RUNNING, Device.RESERVED]):
if device.can_admin(request.user):
device.put_into_maintenance_mode(request.user, "admin action")
offline_action.short_description = "take offline"
=== modified file 'lava_scheduler_app/api.py'
@@ -2,7 +2,6 @@
from simplejson import JSONDecodeError
from django.db.models import Count
from linaro_django_xmlrpc.models import ExposedAPI
-from lava_scheduler_app import utils
from lava_scheduler_app.models import (
Device,
DeviceType,
@@ -165,8 +164,8 @@
.annotate(idle=SumIf('device', condition='status=%s' % Device.IDLE),
offline=SumIf('device', condition='status in (%s,%s)'
% (Device.OFFLINE, Device.OFFLINING)),
- busy=SumIf('device', condition='status=%s'
- % Device.RUNNING), ).order_by('name')
+ busy=SumIf('device', condition='status in (%s,%s)'
+ % (Device.RUNNING, Device.RESERVED)), ).order_by('name')
for dev_type in device_types:
device_type = {}
=== modified file 'lava_scheduler_app/models.py'
@@ -51,19 +51,20 @@
def check_device_availability(requested_devices):
- """Checks whether the number of devices requested is available.
+ """Checks whether the number of devices requested is available for a multinode job.
See utils.requested_device_count() for details of REQUESTED_DEVICES
dictionary format.
- Returns True if the requested number of devices are available, else
- raises DevicesUnavailableException.
+ Returns True for singlenode or if the requested number of devices are available
+ for the multinode job, else raises DevicesUnavailableException.
"""
device_types = DeviceType.objects.values_list('name').filter(
- models.Q(device__status=Device.IDLE) | \
- models.Q(device__status=Device.RUNNING)
+ models.Q(device__status=Device.IDLE) |
+ models.Q(device__status=Device.RUNNING) |
+ models.Q(device__status=Device.RESERVED)
).annotate(
- num_count=models.Count('name')
+ num_count=models.Count('name')
).order_by('name')
if requested_devices:
@@ -115,6 +116,7 @@
RUNNING = 2
OFFLINING = 3
RETIRED = 4
+ RESERVED = 5
STATUS_CHOICES = (
(OFFLINE, 'Offline'),
@@ -122,6 +124,7 @@
(RUNNING, 'Running'),
(OFFLINING, 'Going offline'),
(RETIRED, 'Retired'),
+ (RESERVED, 'Reserved')
)
# A device health shows a device is ready to test or not
@@ -201,7 +204,7 @@
return user.has_perm('lava_scheduler_app.change_device')
def put_into_maintenance_mode(self, user, reason):
- if self.status in [self.RUNNING, self.OFFLINING]:
+ if self.status in [self.RUNNING, self.RESERVED, self.OFFLINING]:
new_status = self.OFFLINING
else:
new_status = self.OFFLINE
@@ -236,6 +239,16 @@
self.health_status = Device.HEALTH_LOOPING
self.save()
+ def cancel_reserved_status(self, user, reason):
+ if self.status != Device.RESERVED:
+ return
+ new_status = self.IDLE
+ DeviceStateTransition.objects.create(
+ created_by=user, device=self, old_state=self.status,
+ new_state=new_status, message=reason, job=None).save()
+ self.status = new_status
+ self.save()
+
class JobFailureTag(models.Model):
"""
@@ -324,7 +337,7 @@
tags = models.ManyToManyField(Tag, blank=True)
- # This is set once the job starts.
+ # This is set once the job starts or is reserved.
actual_device = models.ForeignKey(
Device, null=True, default=None, related_name='+', blank=True)
@@ -598,6 +611,10 @@
return self._can_admin(user) and self.status in states
def cancel(self):
+ # if SUBMITTED with actual_device - clear the actual_device back to idle.
+ if self.status == TestJob.SUBMITTED and self.actual_device is not None:
+ device = Device.objects.get(hostname=self.actual_device)
+ device.cancel_reserved_status(self.submitter, "multinode-cancel")
if self.status == TestJob.RUNNING:
self.status = TestJob.CANCELING
else:
=== modified file 'lava_scheduler_app/views.py'
@@ -371,7 +371,8 @@
.annotate(idle=SumIf('device', condition='status=%s' % Device.IDLE),
offline=SumIf('device', condition='status in (%s,%s)' %
(Device.OFFLINE, Device.OFFLINING)),
- busy=SumIf('device', condition='status=%s' % Device.RUNNING),).order_by('name')
+ busy=SumIf('device', condition='status in (%s,%s)' %
+ (Device.RUNNING, Device.RESERVED)),).order_by('name')
def render_status(self, record):
return "%s idle, %s offline, %s busy" % (record.idle,
@@ -535,7 +536,7 @@
'health_jobs', reverse(health_jobs_json, kwargs=dict(pk=pk)),
params=(device,)),
'show_maintenance': device.can_admin(request.user) and
- device.status in [Device.IDLE, Device.RUNNING],
+ device.status in [Device.IDLE, Device.RUNNING, Device.RESERVED],
'show_online': device.can_admin(request.user) and
device.status in [Device.OFFLINE, Device.OFFLINING],
'bread_crumb_trail': BreadCrumbTrail.leading_to(health_job_list, pk=pk),
@@ -993,7 +994,7 @@
'jobs', reverse(recent_jobs_json, kwargs=dict(pk=device.pk)),
params=(device,)),
'show_maintenance': device.can_admin(request.user) and
- device.status in [Device.IDLE, Device.RUNNING],
+ device.status in [Device.IDLE, Device.RUNNING, Device.RESERVED],
'show_online': device.can_admin(request.user) and
device.status in [Device.OFFLINE, Device.OFFLINING],
'bread_crumb_trail': BreadCrumbTrail.leading_to(device_detail, pk=pk),
=== modified file 'lava_scheduler_daemon/dbjobsource.py'
@@ -129,14 +129,18 @@
def _fix_device(self, device, job):
"""Associate an available/idle DEVICE to the given JOB.
+ If the MultiNode job is waiting as Submitted, the device
+ could be running a different job.
Returns the job with actual_device set to DEVICE.
If we are unable to grab the DEVICE then we return None.
"""
+ if device.status == Device.RUNNING:
+ return None
DeviceStateTransition.objects.create(
created_by=None, device=device, old_state=device.status,
- new_state=Device.RUNNING, message=None, job=job).save()
- device.status = Device.RUNNING
+ new_state=Device.RESERVED, message=None, job=job).save()
+ device.status = Device.RESERVED
device.current_job = job
try:
# The unique constraint on current_job may cause this to
@@ -190,10 +194,10 @@
for d in devices:
self.logger.debug("Checking %s" % d.hostname)
if d.hostname in configured_boards:
- if job:
- job = self._fix_device(d, job)
- if job:
- job_list.add(job)
+ if job:
+ job = self._fix_device(d, job)
+ if job:
+ job_list.add(job)
# Remove scheduling multinode jobs until all the jobs in the
# target_group are assigned devices.
@@ -288,6 +292,14 @@
def getJobDetails_impl(self, job):
job.status = TestJob.RUNNING
+ # need to set the device RUNNING if device was RESERVED
+ if job.actual_device.status == Device.RESERVED:
+ DeviceStateTransition.objects.create(
+ created_by=None, device=job.actual_device, old_state=job.actual_device.status,
+ new_state=Device.RUNNING, message=None, job=job).save()
+ job.actual_device.status = Device.RUNNING
+ job.actual_device.current_job = job
+ job.actual_device.save()
job.start_time = datetime.datetime.utcnow()
shutil.rmtree(job.output_dir, ignore_errors=True)
job.log_file.save('job-%s.log' % job.id, ContentFile(''), save=False)
@@ -316,6 +328,8 @@
device.status = Device.IDLE
elif device.status == Device.OFFLINING:
device.status = Device.OFFLINE
+ elif device.status == Device.RESERVED:
+ device.status = Device.IDLE
else:
self.logger.error(
"Unexpected device state in jobCompleted: %s" % device.status)
=== modified file 'lava_scheduler_daemon/service.py'
@@ -47,7 +47,7 @@
x.hostname for x in dispatcher_config.get_devices()]
for job in job_list:
- if job.actual_device.hostname in configured_boards:
+ if job.actual_device and job.actual_device.hostname in configured_boards:
new_job = JobRunner(self.source, job, self.dispatcher,
self.reactor, self.daemon_options)
self.logger.info("Starting Job: %d " % job.id)