[v2,2/6] cgroup/cpuset: Clarify the use of invalid partition root

Message ID	20210621184924.27493-3-longman@redhat.com
State	New
Headers	show Return-Path: <linux-kselftest-owner@kernel.org> From: Waiman Long <longman@redhat.com> To: Tejun Heo <tj@kernel.org>, Zefan Li <lizefan.x@bytedance.com>, Johannes Weiner <hannes@cmpxchg.org>, Jonathan Corbet <corbet@lwn.net>, Shuah Khan <shuah@kernel.org> Cc: cgroups@vger.kernel.org, linux-kernel@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Andrew Morton <akpm@linux-foundation.org>, Roman Gushchin <guro@fb.com>, Phil Auld <pauld@redhat.com>, Peter Zijlstra <peterz@infradead.org>, Juri Lelli <juri.lelli@redhat.com>, Waiman Long <longman@redhat.com> Subject: [PATCH v2 2/6] cgroup/cpuset: Clarify the use of invalid partition root Date: Mon, 21 Jun 2021 14:49:20 -0400 Message-Id: <20210621184924.27493-3-longman@redhat.com> In-Reply-To: <20210621184924.27493-1-longman@redhat.com> References: <20210621184924.27493-1-longman@redhat.com> Precedence: bulk
Series	cgroup/cpuset: Add new cpuset partition type & empty effecitve cpus \| expand [v2,0/6] cgroup/cpuset: Add new cpuset partition type & empty effecitve cpus [v2,1/6] cgroup/cpuset: Miscellaneous code cleanup [v2,2/6] cgroup/cpuset: Clarify the use of invalid partition root [v2,3/6] cgroup/cpuset: Add a new isolated cpus.partition type [v2,4/6] cgroup/cpuset: Allow non-top parent partition root to distribute out all CPUs [v2,5/6] cgroup/cpuset: Update description of cpuset.cpus.partition in cgroup-v2.rst [v2,6/6] kselftest/cgroup: Add cpuset v2 partition root state test

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index d4164e07c61b..3fe68d0f593d 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -174,7 +174,9 @@ struct cpuset { * subparts_cpus. In this case, the cpuset is not a real partition * root anymore. However, the CPU_EXCLUSIVE bit will still be set * and the cpuset can be restored back to a partition root if the - * parent cpuset can give more CPUs back to this child cpuset. + * parent cpuset can give more CPUs back to this child cpuset. A + * partition root becomes invalid when all its cpus become unavailable + * like being offlined. */ #define PRS_DISABLED 0 #define PRS_ENABLED 1 @@ -1193,6 +1195,15 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, /* * partcmd_update with newmask: * + * Return error if newmask isn't a subset of + * (cpus_allowed | parent->effective_cpus). + */ + cpumask_or(tmp->addmask, cpuset->cpus_allowed, + parent->effective_cpus); + if (!cpumask_subset(newmask, tmp->addmask)) + return -EINVAL; + + /* * delmask = cpus_allowed & ~newmask & parent->subparts_cpus * addmask = newmask & parent->effective_cpus * & ~parent->subparts_cpus @@ -1205,7 +1216,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, adding = cpumask_andnot(tmp->addmask, tmp->addmask, parent->subparts_cpus); /* - * Return error if the new effective_cpus could become empty. + * Return error if parent's effective_cpus could become empty. */ if (adding && cpumask_equal(parent->effective_cpus, tmp->addmask)) { @@ -1221,20 +1232,35 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, return -EINVAL; cpumask_copy(tmp->addmask, parent->effective_cpus); } + + /* + * Return error if effective_cpus becomes empty or any CPU + * distributed to child partitions is deleted. + */ + if (deleting && + (cpumask_intersects(tmp->delmask, cpuset->subparts_cpus) || + cpumask_equal(tmp->delmask, cpuset->effective_cpus))) + return -EBUSY; } else { /* * partcmd_update w/o newmask: * * addmask = cpus_allowed & parent->effective_cpus * + * This gets invoked either due to a hotplug event or + * from update_cpumasks_hier() where we can't return an + * error. This can cause a partition root to become invalid + * in the case of a hotplug. + * * Note that parent's subparts_cpus may have been * pre-shrunk in case there is a change in the cpu list. * So no deletion is needed. */ adding = cpumask_and(tmp->addmask, cpuset->cpus_allowed, parent->effective_cpus); - part_error = cpumask_equal(tmp->addmask, - parent->effective_cpus); + part_error = (is_partition_root(cpuset) && + !parent->nr_subparts_cpus) || + cpumask_equal(tmp->addmask, parent->effective_cpus); } if (cmd == partcmd_update) { @@ -1392,10 +1418,6 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) * When parent is invalid, it has to be too. */ cp->partition_root_state = PRS_ERROR; - if (cp->nr_subparts_cpus) { - cp->nr_subparts_cpus = 0; - cpumask_clear(cp->subparts_cpus); - } break; } } @@ -1406,38 +1428,32 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) spin_lock_irq(&callback_lock); - cpumask_copy(cp->effective_cpus, tmp->new_cpus); if (cp->nr_subparts_cpus && (cp->partition_root_state != PRS_ENABLED)) { + /* + * Put all active subparts_cpus back to effective_cpus. + */ + cpumask_or(tmp->new_cpus, tmp->new_cpus, + cp->subparts_cpus); + cpumask_and(tmp->new_cpus, tmp->new_cpus, + cpu_active_mask); cp->nr_subparts_cpus = 0; cpumask_clear(cp->subparts_cpus); - } else if (cp->nr_subparts_cpus) { + } + + cpumask_copy(cp->effective_cpus, tmp->new_cpus); + if (cp->nr_subparts_cpus) { /* * Make sure that effective_cpus & subparts_cpus - * are mutually exclusive. - * - * In the unlikely event that effective_cpus - * becomes empty. we clear cp->nr_subparts_cpus and - * let its child partition roots to compete for - * CPUs again. + * of a partition root are mutually exclusive. */ cpumask_andnot(cp->effective_cpus, cp->effective_cpus, cp->subparts_cpus); - if (cpumask_empty(cp->effective_cpus)) { - cpumask_copy(cp->effective_cpus, tmp->new_cpus); - cpumask_clear(cp->subparts_cpus); - cp->nr_subparts_cpus = 0; - } else if (!cpumask_subset(cp->subparts_cpus, - tmp->new_cpus)) { - cpumask_andnot(cp->subparts_cpus, - cp->subparts_cpus, tmp->new_cpus); - cp->nr_subparts_cpus - = cpumask_weight(cp->subparts_cpus); - } + WARN_ON_ONCE(cpumask_empty(cp->effective_cpus)); } spin_unlock_irq(&callback_lock); - WARN_ON(!is_in_v2_mode() && + WARN_ON_ONCE(!is_in_v2_mode() && !cpumask_equal(cp->cpus_allowed, cp->effective_cpus)); update_tasks_cpumask(cp); @@ -1560,8 +1576,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, * Make sure that subparts_cpus is a subset of cpus_allowed. */ if (cs->nr_subparts_cpus) { - cpumask_andnot(cs->subparts_cpus, cs->subparts_cpus, - cs->cpus_allowed); + cpumask_and(cs->subparts_cpus, cs->subparts_cpus, + cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } spin_unlock_irq(&callback_lock); @@ -1984,21 +2000,26 @@ static int update_prstate(struct cpuset *cs, int new_prs) cs->partition_root_state = PRS_ENABLED; } else { /* - * Turning off partition root will clear the - * CS_CPU_EXCLUSIVE bit. + * Switch back to member is always allowed if PRS_ERROR. */ if (cs->partition_root_state == PRS_ERROR) { - cs->partition_root_state = 0; - update_flag(CS_CPU_EXCLUSIVE, cs, 0); err = 0; - goto out; + goto reset_flags; } + /* + * A partition root cannot be reverted to member if some + * CPUs have been distributed to child partition roots. + */ + if (!cpumask_empty(cs->subparts_cpus)) + return -EBUSY; + err = update_parent_subparts_cpumask(cs, partcmd_disable, NULL, &tmpmask); if (err) goto out; +reset_flags: cs->partition_root_state = 0; /* Turning off CS_CPU_EXCLUSIVE will not return error */ @@ -3074,41 +3095,42 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) /* * In the unlikely event that a partition root has empty - * effective_cpus or its parent becomes erroneous, we have to - * transition it to the erroneous state. + * effective_cpus, we will have to force any child partitions, + * if present, to become invalid by setting nr_subparts_cpus to 0 + * without causing itself to become invalid. + */ + if (is_partition_root(cs) && cs->nr_subparts_cpus && + cpumask_empty(&new_cpus)) { + cs->nr_subparts_cpus = 0; + cpumask_clear(cs->subparts_cpus); + compute_effective_cpumask(&new_cpus, cs, parent); + } + + /* + * If empty effective_cpus or zero nr_subparts_cpus or its parent + * becomes erroneous, we have to transition it to the erroneous state. */ if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || - (parent->partition_root_state == PRS_ERROR))) { + (parent->partition_root_state == PRS_ERROR) || + !parent->nr_subparts_cpus)) { + update_parent_subparts_cpumask(cs, partcmd_disable, + NULL, tmp); if (cs->nr_subparts_cpus) { cs->nr_subparts_cpus = 0; cpumask_clear(cs->subparts_cpus); compute_effective_cpumask(&new_cpus, cs, parent); } - - /* - * If the effective_cpus is empty because the child - * partitions take away all the CPUs, we can keep - * the current partition and let the child partitions - * fight for available CPUs. - */ - if ((parent->partition_root_state == PRS_ERROR) || - cpumask_empty(&new_cpus)) { - update_parent_subparts_cpumask(cs, partcmd_disable, - NULL, tmp); - cs->partition_root_state = PRS_ERROR; - } + cs->partition_root_state = PRS_ERROR; cpuset_force_rebuild(); } /* * On the other hand, an erroneous partition root may be transitioned - * back to a regular one or a partition root with no CPU allocated - * from the parent may change to erroneous. + * back to a regular one. */ - if (is_partition_root(parent) && - ((cs->partition_root_state == PRS_ERROR) || - !cpumask_intersects(&new_cpus, parent->subparts_cpus)) && - update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp)) + else if (is_partition_root(parent) && + (cs->partition_root_state == PRS_ERROR) && + update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp)) cpuset_force_rebuild(); update_tasks:

[v2,2/6] cgroup/cpuset: Clarify the use of invalid partition root

Commit Message

Comments

Patch