[Branch,~linaro-validation/lava-scheduler/trunk] Rev 257: Neil Williams 2013-09-02 Ensure there is an actual device before trying

Message ID	20130903110541.17878.98750.launchpad@ackee.canonical.com
State	Accepted
Headers	show Return-Path: <patchwork-forward+bncBC73BIMB7QIBBCEFS6IQKGQEPJH4MEY@linaro.org> Received-SPF: neutral (google.com: 209.85.212.51 is neither permitted nor denied by best guess record for domain of patch+caf_=patchwork-forward=linaro.org@linaro.org) client-ip=209.85.212.51; Received-SPF: pass (google.com: best guess record for domain of bounces@canonical.com designates 91.189.90.7 as permitted sender) client-ip=91.189.90.7; MIME-Version: 1.0 To: Linaro Patch Tracker <patches@linaro.org> From: noreply@launchpad.net Subject: [Branch ~linaro-validation/lava-scheduler/trunk] Rev 257: Neil Williams 2013-09-02 Ensure there is an actual device before trying Message-Id: <20130903110541.17878.98750.launchpad@ackee.canonical.com> Date: Tue, 03 Sep 2013 11:05:41 -0000 Reply-To: noreply@launchpad.net Sender: bounces@canonical.com Errors-To: bounces@canonical.com Precedence: list Mailing-list: list patchwork-forward@linaro.org; contact patchwork-forward+owners@linaro.org Content-Type: multipart/mixed; boundary="===============1610924345586546510=="

=== modified file 'lava_scheduler_app/admin.py' --- lava_scheduler_app/admin.py 2013-08-28 15:13:07 +0000 +++ lava_scheduler_app/admin.py 2013-09-02 15:14:15 +0000 @@ -8,7 +8,7 @@ def offline_action(modeladmin, request, queryset): - for device in queryset.filter(status__in=[Device.IDLE, Device.RUNNING]): + for device in queryset.filter(status__in=[Device.IDLE, Device.RUNNING, Device.RESERVED]): if device.can_admin(request.user): device.put_into_maintenance_mode(request.user, "admin action") offline_action.short_description = "take offline" === modified file 'lava_scheduler_app/api.py' --- lava_scheduler_app/api.py 2013-08-28 15:13:07 +0000 +++ lava_scheduler_app/api.py 2013-09-02 15:14:15 +0000 @@ -2,7 +2,6 @@ from simplejson import JSONDecodeError from django.db.models import Count from linaro_django_xmlrpc.models import ExposedAPI -from lava_scheduler_app import utils from lava_scheduler_app.models import ( Device, DeviceType, @@ -165,8 +164,8 @@ .annotate(idle=SumIf('device', condition='status=%s' % Device.IDLE), offline=SumIf('device', condition='status in (%s,%s)' % (Device.OFFLINE, Device.OFFLINING)), - busy=SumIf('device', condition='status=%s' - % Device.RUNNING), ).order_by('name') + busy=SumIf('device', condition='status in (%s,%s)' + % (Device.RUNNING, Device.RESERVED)), ).order_by('name') for dev_type in device_types: device_type = {} === modified file 'lava_scheduler_app/models.py' --- lava_scheduler_app/models.py 2013-08-28 15:13:07 +0000 +++ lava_scheduler_app/models.py 2013-09-02 15:42:27 +0000 @@ -51,19 +51,20 @@ def check_device_availability(requested_devices): - """Checks whether the number of devices requested is available. + """Checks whether the number of devices requested is available for a multinode job. See utils.requested_device_count() for details of REQUESTED_DEVICES dictionary format. - Returns True if the requested number of devices are available, else - raises DevicesUnavailableException. + Returns True for singlenode or if the requested number of devices are available + for the multinode job, else raises DevicesUnavailableException. """ device_types = DeviceType.objects.values_list('name').filter( - models.Q(device__status=Device.IDLE) | \ - models.Q(device__status=Device.RUNNING) + models.Q(device__status=Device.IDLE) | + models.Q(device__status=Device.RUNNING) | + models.Q(device__status=Device.RESERVED) ).annotate( - num_count=models.Count('name') + num_count=models.Count('name') ).order_by('name') if requested_devices: @@ -115,6 +116,7 @@ RUNNING = 2 OFFLINING = 3 RETIRED = 4 + RESERVED = 5 STATUS_CHOICES = ( (OFFLINE, 'Offline'), @@ -122,6 +124,7 @@ (RUNNING, 'Running'), (OFFLINING, 'Going offline'), (RETIRED, 'Retired'), + (RESERVED, 'Reserved') ) # A device health shows a device is ready to test or not @@ -201,7 +204,7 @@ return user.has_perm('lava_scheduler_app.change_device') def put_into_maintenance_mode(self, user, reason): - if self.status in [self.RUNNING, self.OFFLINING]: + if self.status in [self.RUNNING, self.RESERVED, self.OFFLINING]: new_status = self.OFFLINING else: new_status = self.OFFLINE @@ -236,6 +239,16 @@ self.health_status = Device.HEALTH_LOOPING self.save() + def cancel_reserved_status(self, user, reason): + if self.status != Device.RESERVED: + return + new_status = self.IDLE + DeviceStateTransition.objects.create( + created_by=user, device=self, old_state=self.status, + new_state=new_status, message=reason, job=None).save() + self.status = new_status + self.save() + class JobFailureTag(models.Model): """ @@ -324,7 +337,7 @@ tags = models.ManyToManyField(Tag, blank=True) - # This is set once the job starts. + # This is set once the job starts or is reserved. actual_device = models.ForeignKey( Device, null=True, default=None, related_name='+', blank=True) @@ -598,6 +611,10 @@ return self._can_admin(user) and self.status in states def cancel(self): + # if SUBMITTED with actual_device - clear the actual_device back to idle. + if self.status == TestJob.SUBMITTED and self.actual_device is not None: + device = Device.objects.get(hostname=self.actual_device) + device.cancel_reserved_status(self.submitter, "multinode-cancel") if self.status == TestJob.RUNNING: self.status = TestJob.CANCELING else: === modified file 'lava_scheduler_app/views.py' --- lava_scheduler_app/views.py 2013-08-28 15:13:07 +0000 +++ lava_scheduler_app/views.py 2013-09-02 15:14:15 +0000 @@ -371,7 +371,8 @@ .annotate(idle=SumIf('device', condition='status=%s' % Device.IDLE), offline=SumIf('device', condition='status in (%s,%s)' % (Device.OFFLINE, Device.OFFLINING)), - busy=SumIf('device', condition='status=%s' % Device.RUNNING),).order_by('name') + busy=SumIf('device', condition='status in (%s,%s)' % + (Device.RUNNING, Device.RESERVED)),).order_by('name') def render_status(self, record): return "%s idle, %s offline, %s busy" % (record.idle, @@ -535,7 +536,7 @@ 'health_jobs', reverse(health_jobs_json, kwargs=dict(pk=pk)), params=(device,)), 'show_maintenance': device.can_admin(request.user) and - device.status in [Device.IDLE, Device.RUNNING], + device.status in [Device.IDLE, Device.RUNNING, Device.RESERVED], 'show_online': device.can_admin(request.user) and device.status in [Device.OFFLINE, Device.OFFLINING], 'bread_crumb_trail': BreadCrumbTrail.leading_to(health_job_list, pk=pk), @@ -993,7 +994,7 @@ 'jobs', reverse(recent_jobs_json, kwargs=dict(pk=device.pk)), params=(device,)), 'show_maintenance': device.can_admin(request.user) and - device.status in [Device.IDLE, Device.RUNNING], + device.status in [Device.IDLE, Device.RUNNING, Device.RESERVED], 'show_online': device.can_admin(request.user) and device.status in [Device.OFFLINE, Device.OFFLINING], 'bread_crumb_trail': BreadCrumbTrail.leading_to(device_detail, pk=pk), === modified file 'lava_scheduler_daemon/dbjobsource.py' --- lava_scheduler_daemon/dbjobsource.py 2013-08-31 01:38:21 +0000 +++ lava_scheduler_daemon/dbjobsource.py 2013-09-02 18:14:25 +0000 @@ -129,14 +129,18 @@ def _fix_device(self, device, job): """Associate an available/idle DEVICE to the given JOB. + If the MultiNode job is waiting as Submitted, the device + could be running a different job. Returns the job with actual_device set to DEVICE. If we are unable to grab the DEVICE then we return None. """ + if device.status == Device.RUNNING: + return None DeviceStateTransition.objects.create( created_by=None, device=device, old_state=device.status, - new_state=Device.RUNNING, message=None, job=job).save() - device.status = Device.RUNNING + new_state=Device.RESERVED, message=None, job=job).save() + device.status = Device.RESERVED device.current_job = job try: # The unique constraint on current_job may cause this to @@ -190,10 +194,10 @@ for d in devices: self.logger.debug("Checking %s" % d.hostname) if d.hostname in configured_boards: - if job: - job = self._fix_device(d, job) - if job: - job_list.add(job) + if job: + job = self._fix_device(d, job) + if job: + job_list.add(job) # Remove scheduling multinode jobs until all the jobs in the # target_group are assigned devices. @@ -288,6 +292,14 @@ def getJobDetails_impl(self, job): job.status = TestJob.RUNNING + # need to set the device RUNNING if device was RESERVED + if job.actual_device.status == Device.RESERVED: + DeviceStateTransition.objects.create( + created_by=None, device=job.actual_device, old_state=job.actual_device.status, + new_state=Device.RUNNING, message=None, job=job).save() + job.actual_device.status = Device.RUNNING + job.actual_device.current_job = job + job.actual_device.save() job.start_time = datetime.datetime.utcnow() shutil.rmtree(job.output_dir, ignore_errors=True) job.log_file.save('job-%s.log' % job.id, ContentFile(''), save=False) @@ -316,6 +328,8 @@ device.status = Device.IDLE elif device.status == Device.OFFLINING: device.status = Device.OFFLINE + elif device.status == Device.RESERVED: + device.status = Device.IDLE else: self.logger.error( "Unexpected device state in jobCompleted: %s" % device.status) === modified file 'lava_scheduler_daemon/service.py' --- lava_scheduler_daemon/service.py 2013-08-30 18:07:18 +0000 +++ lava_scheduler_daemon/service.py 2013-09-02 18:14:53 +0000 @@ -47,7 +47,7 @@ x.hostname for x in dispatcher_config.get_devices()] for job in job_list: - if job.actual_device.hostname in configured_boards: + if job.actual_device and job.actual_device.hostname in configured_boards: new_job = JobRunner(self.source, job, self.dispatcher, self.reactor, self.daemon_options) self.logger.info("Starting Job: %d " % job.id)

[Branch,~linaro-validation/lava-scheduler/trunk] Rev 257: Neil Williams 2013-09-02 Ensure there is an actual device before trying

Commit Message

Patch