@@ -1363,7 +1363,7 @@ int begin_new_exec(struct linux_binprm * bprm)
WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1);
flush_signal_handlers(me, 0);
- retval = set_cred_ucounts(bprm->cred);
+ retval = set_cred_ucounts(bprm->cred, NULL);
if (retval < 0)
goto out_unlock;
@@ -170,7 +170,7 @@ extern int set_security_override_from_ctx(struct cred *, const char *);
extern int set_create_files_as(struct cred *, struct inode *);
extern int cred_fscmp(const struct cred *, const struct cred *);
extern void __init cred_init(void);
-extern int set_cred_ucounts(struct cred *);
+extern int set_cred_ucounts(struct cred *, unsigned int *);
/*
* check for validity of credentials
@@ -370,7 +370,7 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
ret = create_user_ns(new);
if (ret < 0)
goto error_put;
- ret = set_cred_ucounts(new);
+ ret = set_cred_ucounts(new, NULL);
if (ret < 0)
goto error_put;
}
@@ -492,7 +492,7 @@ int commit_creds(struct cred *new)
/* do it
* RLIMIT_NPROC limits on user->processes have already been checked
- * in set_user().
+ * in set_cred_ucounts().
*/
alter_cred_subscribers(new, 2);
if (new->user != old->user || new->user_ns != old->user_ns)
@@ -663,7 +663,7 @@ int cred_fscmp(const struct cred *a, const struct cred *b)
}
EXPORT_SYMBOL(cred_fscmp);
-int set_cred_ucounts(struct cred *new)
+int set_cred_ucounts(struct cred *new, unsigned int *nproc_flags)
{
struct task_struct *task = current;
const struct cred *old = task->real_cred;
@@ -685,6 +685,24 @@ int set_cred_ucounts(struct cred *new)
new->ucounts = new_ucounts;
put_ucounts(old_ucounts);
+ if (!nproc_flags)
+ return 0;
+
+ /*
+ * We don't fail in case of NPROC limit excess here because too many
+ * poorly written programs don't check set*uid() return code, assuming
+ * it never fails if called by root. We may still enforce NPROC limit
+ * for programs doing set*uid()+execve() by harmlessly deferring the
+ * failure to the execve() stage.
+ */
+ if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
+ new->user != INIT_USER &&
+ !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
+ !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
+ *nproc_flags |= PF_NPROC_EXCEEDED;
+ else
+ *nproc_flags &= ~PF_NPROC_EXCEEDED;
+
return 0;
}
@@ -3051,7 +3051,7 @@ int ksys_unshare(unsigned long unshare_flags)
goto bad_unshare_cleanup_cred;
if (new_cred) {
- err = set_cred_ucounts(new_cred);
+ err = set_cred_ucounts(new_cred, NULL);
if (err)
goto bad_unshare_cleanup_cred;
}
@@ -472,21 +472,6 @@ static int set_user(struct cred *new)
if (!new_user)
return -EAGAIN;
- /*
- * We don't fail in case of NPROC limit excess here because too many
- * poorly written programs don't check set*uid() return code, assuming
- * it never fails if called by root. We may still enforce NPROC limit
- * for programs doing set*uid()+execve() by harmlessly deferring the
- * failure to the execve() stage.
- */
- if (ucounts_limit_cmp(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) >= 0 &&
- new_user != INIT_USER &&
- !security_capable(new, &init_user_ns, CAP_SYS_RESOURCE, CAP_OPT_NONE) &&
- !security_capable(new, &init_user_ns, CAP_SYS_ADMIN, CAP_OPT_NONE))
- current->flags |= PF_NPROC_EXCEEDED;
- else
- current->flags &= ~PF_NPROC_EXCEEDED;
-
free_uid(new->user);
new->user = new_user;
return 0;
@@ -560,7 +545,7 @@ long __sys_setreuid(uid_t ruid, uid_t euid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
@@ -622,7 +607,7 @@ long __sys_setuid(uid_t uid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
@@ -701,7 +686,7 @@ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
if (retval < 0)
goto error;
- retval = set_cred_ucounts(new);
+ retval = set_cred_ucounts(new, ¤t->flags);
if (retval < 0)
goto error;
@@ -1344,7 +1344,7 @@ static int userns_install(struct nsset *nsset, struct ns_common *ns)
put_user_ns(cred->user_ns);
set_cred_user_ns(cred, get_user_ns(user_ns));
- if (set_cred_ucounts(cred) < 0)
+ if (set_cred_ucounts(cred, NULL) < 0)
return -EINVAL;
return 0;
The generic idea is that not even root or capable user can force an unprivileged user's limit breach. (For historical and security reasons this check is postponed from set*uid to execve.) During the switch the resource consumption of target the user has to be checked. The commits 905ae01c4ae2 ("Add a reference to ucounts for each cred") and 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") made the check in set_user() look at the old user's consumption. This version of the fix simply moves the check to the place where the actual switch of the accounting structure happens -- set_cred_ucounts(). The other callers are kept without the check but with the per-userns accounting they may be newly subject to the check too. The set_cred_ucounts() becomes inconsistent since task->flags are passed by the caller but task_rlimit() is implicitly `current`'s, this patch is meant to illustrate the issue, nicer implementation is possible. Fixes: 21d1c5e386bc ("Reimplement RLIMIT_NPROC on top of ucounts") Signed-off-by: Michal Koutný <mkoutny@suse.com> --- fs/exec.c | 2 +- include/linux/cred.h | 2 +- kernel/cred.c | 24 +++++++++++++++++++++--- kernel/fork.c | 2 +- kernel/sys.c | 21 +++------------------ kernel/user_namespace.c | 2 +- 6 files changed, 28 insertions(+), 25 deletions(-)