diff mbox series

[RFC,6/6] selftests: Test RLIMIT_NPROC in clone-created user namespaces

Message ID 20220207121800.5079-7-mkoutny@suse.com
State New
Headers show
Series RLIMIT_NPROC in ucounts fixups | expand

Commit Message

Michal Koutný Feb. 7, 2022, 12:18 p.m. UTC
Verify RLIMIT_NPROC observance in user namespaces also in the
clone(CLONE_NEWUSER) path.
Note the such a user_ns is created by the privileged user.

Signed-off-by: Michal Koutný <mkoutny@suse.com>
---
 .../selftests/rlimits/rlimits-per-userns.c    | 141 +++++++++++++-----
 1 file changed, 101 insertions(+), 40 deletions(-)

Comments

Michal Koutný Feb. 15, 2022, 9:34 a.m. UTC | #1
On Wed, Feb 09, 2022 at 06:25:34PM -0700, Shuah Khan <skhan@linuxfoundation.org> wrote:
> Does this test run in non-privileged user mode? If it doesn't
> let add a check and skip the test.

It requires user namespaces created by the privileged user (to bypass
RLIMIT_NPROC on the top level).

I'll add the check into code.

Michal
diff mbox series

Patch

diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
index 54c1b345e42b..46f4cff36b30 100644
--- a/tools/testing/selftests/rlimits/rlimits-per-userns.c
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -1,6 +1,7 @@ 
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ * Author: Michal Koutný <mkoutny@suse.com>
  */
 #define _GNU_SOURCE
 #include <sys/types.h>
@@ -25,16 +26,25 @@ 
 
 #define THE_LIMIT 4
 #define NR_CHILDREN 5
+#define STACK_SIZE (2 * (1<<20))
 
-static_assert(NR_CHILDREN >= THE_LIMIT-1, "Need slots for limit-1 children.");
+static_assert(NR_CHILDREN >= THE_LIMIT-1, "Need slots for THE_LIMIT-1 children.");
 
-static char *service_prog;
 static uid_t user   = 60000;
 static uid_t group  = 60000;
 static struct rlimit saved_limit;
 
-/* Two uses: main and service */
-static pid_t child[NR_CHILDREN];
+enum userns_mode {
+	UM_UNSHARE,		/* setrlimit,clone(0),setuid,unshare,execve */
+	UM_CLONE_NEWUSER,	/* setrlimit,clone(NEWUSER),setuid,execve */
+};
+static struct {
+	int control_fd;
+	char *pathname;
+	enum userns_mode mode;
+} child_args;
+
+/* Cache current pid */
 static pid_t pid;
 
 static void setrlimit_nproc(rlim_t n)
@@ -60,6 +70,7 @@  static void restore_rlimit_nproc(void)
 }
 
 enum msg_sync {
+	MAP_DEFINE,
 	UNSHARE,
 	RLIMIT_RESTORE,
 };
@@ -80,15 +91,32 @@  static void sync_wait(int fd, enum msg_sync m)
 		warnx("(pid=%d): failed sync-read", pid);
 }
 
-static pid_t fork_child(int control_fd)
+static int define_maps(pid_t child_pid)
 {
-	pid_t new_pid = fork();
+	FILE *f;
+	char filename[PATH_MAX];
 
-	if (new_pid < 0)
-		err(EXIT_FAILURE, "fork");
+	if (child_args.mode != UM_CLONE_NEWUSER)
+		return 0;
+
+	snprintf(filename, PATH_MAX, "/proc/%i/uid_map", child_pid);
+	f = fopen(filename, "w");
+	if (fprintf(f, "%i %i 1\n", user, user) < 0)
+		return -1;
+	fclose(f);
+
+	snprintf(filename, PATH_MAX, "/proc/%i/gid_map", child_pid);
+	f = fopen(filename, "w");
+	if (fprintf(f, "%i %i 1\n", group, group) < 0)
+		return -1;
+	fclose(f);
+
+	return 0;
+}
 
-	if (new_pid > 0)
-		return new_pid;
+static int setup_and_exec(void *arg)
+{
+	int control_fd = child_args.control_fd;
 
 	pid = getpid();
 	warnx("(pid=%d): New process starting ...", pid);
@@ -98,6 +126,7 @@  static pid_t fork_child(int control_fd)
 
 	signal(SIGUSR1, SIG_DFL);
 
+	sync_wait(control_fd, RLIMIT_RESTORE);
 	warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
 
 	if (setgid(group) < 0)
@@ -107,9 +136,11 @@  static pid_t fork_child(int control_fd)
 
 	warnx("(pid=%d): Service running ...", pid);
 
-	warnx("(pid=%d): Unshare user namespace", pid);
-	if (unshare(CLONE_NEWUSER) < 0)
-		err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+	if (child_args.mode == UM_UNSHARE) {
+		warnx("(pid=%d): Unshare user namespace", pid);
+		if (unshare(CLONE_NEWUSER) < 0)
+			err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+	}
 
 	sync_notify(control_fd, UNSHARE);
 	sync_wait(control_fd, RLIMIT_RESTORE);
@@ -119,14 +150,30 @@  static pid_t fork_child(int control_fd)
 
 	warnx("(pid=%d): Executing real service ...", pid);
 
-	execve(service_prog, argv, envp);
+	execve(child_args.pathname, argv, envp);
 	err(EXIT_FAILURE, "(pid=%d): execve", pid);
 }
 
-static void run_service(void)
+static pid_t start_child(char *pathname, int control_fd)
+{
+	char *stack = malloc(STACK_SIZE);
+	int flags = child_args.mode == UM_CLONE_NEWUSER ? CLONE_NEWUSER : 0;
+	pid_t new_pid;
+
+	child_args.control_fd = control_fd;
+	child_args.pathname = pathname;
+
+	new_pid = clone(setup_and_exec, stack+STACK_SIZE-1, flags, NULL);
+	if (new_pid < 0)
+		err(EXIT_FAILURE, "clone");
+
+	free(stack);
+	close(control_fd);
+	return new_pid;
+}
+
+static void dump_context(size_t n_workers)
 {
-	size_t i;
-	int ret = EXIT_SUCCESS;
 	struct rlimit limit;
 	char user_ns[PATH_MAX];
 
@@ -135,44 +182,55 @@  static void run_service(void)
 	if (readlink("/proc/self/ns/user", user_ns, PATH_MAX) < 0)
 		err(EXIT_FAILURE, "(pid=%d) failed readlink", pid);
 
-	warnx("(pid=%d) Service instance attempts %i children, limit %lu:%lu, ns=%s",
-	      pid, THE_LIMIT, limit.rlim_cur, limit.rlim_max, user_ns);
+	warnx("(pid=%d) Service instance attempts %lu workers, limit %lu:%lu, ns=%s",
+	      pid, n_workers, limit.rlim_cur, limit.rlim_max, user_ns);
+}
+
+static int run_service(void)
+{
+	size_t i, n_workers = THE_LIMIT;
+	pid_t worker[NR_CHILDREN];
+	int ret = EXIT_SUCCESS;
 
-	/* test rlimit inside the service, effectively THE_LIMIT-1 becaue of service itself */
-	for (i = 0; i < THE_LIMIT; i++) {
-		child[i] = fork();
-		if (child[i] == 0) {
-			/* service child */
+	dump_context(n_workers);
+
+	/* test rlimit inside the service, last worker should fail because of service itself */
+	for (i = 0; i < n_workers; i++) {
+		worker[i] = fork();
+		if (worker[i] == 0) {
+			/* service worker */
 			pause();
 			exit(EXIT_SUCCESS);
 		}
-		if (child[i] < 0) {
+		if (worker[i] < 0) {
 			warnx("(pid=%d) service fork %lu failed, errno = %i", pid, i+1, errno);
-			if (!(i == THE_LIMIT-1 && errno == EAGAIN))
+			if (!(i == n_workers-1 && errno == EAGAIN))
 				ret = EXIT_FAILURE;
-		} else if (i == THE_LIMIT-1) {
+		} else if (i == n_workers-1) {
 			warnx("(pid=%d) RLIMIT_NPROC not honored", pid);
 			ret = EXIT_FAILURE;
 		}
 	}
 
 	/* service cleanup */
-	for (i = 0; i < THE_LIMIT; i++)
-		if (child[i] > 0)
-			kill(child[i], SIGUSR1);
+	for (i = 0; i < n_workers; i++)
+		if (worker[i] > 0)
+			kill(worker[i], SIGUSR1);
 
-	for (i = 0; i < THE_LIMIT; i++)
-		if (child[i] > 0)
-			waitpid(child[i], NULL, WNOHANG);
+	for (i = 0; i < n_workers; i++)
+		if (worker[i] > 0)
+			waitpid(worker[i], NULL, WNOHANG);
 
 	if (ret)
-		exit(ret);
+		return ret;
 	pause();
+	return EXIT_FAILURE;
 }
 
 int main(int argc, char **argv)
 {
 	size_t i;
+	pid_t child[NR_CHILDREN];
 	int control_fd[NR_CHILDREN];
 	int wstatus[NR_CHILDREN];
 	int children = NR_CHILDREN;
@@ -180,12 +238,11 @@  int main(int argc, char **argv)
 
 	pid = getpid();
 
-	if (getenv("I_AM_SERVICE")) {
-		run_service();
-		exit(EXIT_FAILURE);
-	}
+	if (getenv("I_AM_SERVICE"))
+		return run_service();
 
-	service_prog = argv[0];
+	if (argc > 1 && *argv[1] == 'c')
+		child_args.mode = UM_CLONE_NEWUSER;
 
 	warnx("(pid=%d) Starting testcase", pid);
 
@@ -194,8 +251,12 @@  int main(int argc, char **argv)
 		if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0, sockets) < 0)
 			err(EXIT_FAILURE, "(pid=%d) socketpair failed", pid);
 		control_fd[i] = sockets[0];
-		child[i] = fork_child(sockets[1]);
+		child[i] = start_child(argv[0], sockets[1]);
 		wstatus[i] = 0;
+
+		if (define_maps(child[i]) < 0)
+			err(EXIT_FAILURE, "(pid=%d) user_ns maps definition failed", pid);
+		sync_notify(control_fd[i], MAP_DEFINE);
 	}
 
 	for (i = 0; i < NR_CHILDREN; i++)