diff mbox series

[v7,2/4] elf: Do not change stack permission on dlopen/dlmopen

Message ID 20241226175834.2531046-3-adhemerval.zanella@linaro.org
State Accepted
Commit 0ca8785a28515291d4ef074b5b6cfb27434c1d2b
Headers show
Series Improve executable stack handling | expand

Commit Message

Adhemerval Zanella Dec. 26, 2024, 5:57 p.m. UTC
If some shared library loaded with dlopen/dlmopen requires an executable
stack, either implicitly because of a missing GNU_STACK ELF header
(where the ABI default flags implies in the executable bit) or explicitly
because of the executable bit from GNU_STACK; the loader will try to set
the both the main thread and all thread stacks (from the pthread cache)
as executable.

Besides the issue where any __nptl_change_stack_perm failure does not
undo the previous executable transition (meaning that if the library
fails to load, there can be thread stacks with executable stacks), this
behavior was used on recent CVE [1] as a vector for RCE.

This patch changes that if a shared library requires an executable
stack, and the current stack is not executable, dlopen fails.  The
change is done only for dynamically loaded modules, if the program
or any dependency requires an executable stack, the loader will still
change the main thread before program execution and any thread created
with default stack configuration.

[1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 NEWS                                   |  6 +++
 elf/dl-load.c                          | 13 ++---
 elf/dl-support.c                       |  4 --
 elf/rtld.c                             |  6 ---
 elf/tst-execstack.c                    | 62 +++++++++++++++---------
 nptl/allocatestack.c                   | 19 --------
 sysdeps/generic/ldsodefs.h             | 22 ++-------
 sysdeps/mach/hurd/Makefile             |  2 +
 sysdeps/mach/hurd/dl-execstack.c       |  1 -
 sysdeps/nptl/pthreadP.h                |  6 ---
 sysdeps/unix/sysv/linux/Versions       |  3 --
 sysdeps/unix/sysv/linux/dl-execstack.c | 67 +-------------------------
 sysdeps/unix/sysv/linux/mips/Makefile  |  7 +++
 13 files changed, 66 insertions(+), 152 deletions(-)

Comments

Florian Weimer Dec. 30, 2024, 5:14 p.m. UTC | #1
* Adhemerval Zanella:

> If some shared library loaded with dlopen/dlmopen requires an executable
> stack, either implicitly because of a missing GNU_STACK ELF header
> (where the ABI default flags implies in the executable bit) or explicitly
> because of the executable bit from GNU_STACK; the loader will try to set
> the both the main thread and all thread stacks (from the pthread cache)
> as executable.
>
> Besides the issue where any __nptl_change_stack_perm failure does not
> undo the previous executable transition (meaning that if the library
> fails to load, there can be thread stacks with executable stacks), this
> behavior was used on recent CVE [1] as a vector for RCE.
>
> This patch changes that if a shared library requires an executable
> stack, and the current stack is not executable, dlopen fails.  The
> change is done only for dynamically loaded modules, if the program
> or any dependency requires an executable stack, the loader will still
> change the main thread before program execution and any thread created
> with default stack configuration.
>
> [1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt

I wouldn't call the proof-of-concept exploit “recent” at this point.

> +* dlopen and dlmopen no longer make the stack executable if a shared
> +  library requires it, either implicitly because of a missing GNU_STACK ELF
> +  header (and default ABI permission having the executable bit set) or
> +  explicitly because of the executable bit in GNU_STACK, and the stack is
> +  not already executable.  Instead, loading such objects will fail.

Okay.

> diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
> index 509149ad37..4679a9daca 100644
> --- a/elf/tst-execstack.c
> +++ b/elf/tst-execstack.c
> @@ -23,16 +23,33 @@
>  #include <stackinfo.h>
>  #include <stdbool.h>
>  #include <string.h>
> +#include <stdlib.h>
>  #include <support/xdlfcn.h>
>  #include <support/xthread.h>
>  #include <support/check.h>
>  #include <support/xstdio.h>
>  
> -static void deeper (void (*f) (void));
> +/* The DEFAULT_RWX_STACK controls whether the toolchain enables an executable
> +   stack for the testcase (which does not contain features that might require
> +   an executable stack, such as nested function).
> +   Some ABIs do require an executable stack, even if the toolchain supports
> +   non-executable stack.  In this cases the DEFAULT_RWX_STACK can be
> +   overrided.  */

“overridden”, I think.

Rest looks okay.

Reviewed-by: Florian Weimer <fweimer@redhat.com>

Thanks,
Florian
Adhemerval Zanella Dec. 30, 2024, 7:43 p.m. UTC | #2
On 30/12/24 14:14, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> If some shared library loaded with dlopen/dlmopen requires an executable
>> stack, either implicitly because of a missing GNU_STACK ELF header
>> (where the ABI default flags implies in the executable bit) or explicitly
>> because of the executable bit from GNU_STACK; the loader will try to set
>> the both the main thread and all thread stacks (from the pthread cache)
>> as executable.
>>
>> Besides the issue where any __nptl_change_stack_perm failure does not
>> undo the previous executable transition (meaning that if the library
>> fails to load, there can be thread stacks with executable stacks), this
>> behavior was used on recent CVE [1] as a vector for RCE.
>>
>> This patch changes that if a shared library requires an executable
>> stack, and the current stack is not executable, dlopen fails.  The
>> change is done only for dynamically loaded modules, if the program
>> or any dependency requires an executable stack, the loader will still
>> change the main thread before program execution and any thread created
>> with default stack configuration.
>>
>> [1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt
> 
> I wouldn't call the proof-of-concept exploit “recent” at this point.

Ack, I removed the 'recent' from commit message.

> 
>> +* dlopen and dlmopen no longer make the stack executable if a shared
>> +  library requires it, either implicitly because of a missing GNU_STACK ELF
>> +  header (and default ABI permission having the executable bit set) or
>> +  explicitly because of the executable bit in GNU_STACK, and the stack is
>> +  not already executable.  Instead, loading such objects will fail.
> 
> Okay.
> 
>> diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
>> index 509149ad37..4679a9daca 100644
>> --- a/elf/tst-execstack.c
>> +++ b/elf/tst-execstack.c
>> @@ -23,16 +23,33 @@
>>  #include <stackinfo.h>
>>  #include <stdbool.h>
>>  #include <string.h>
>> +#include <stdlib.h>
>>  #include <support/xdlfcn.h>
>>  #include <support/xthread.h>
>>  #include <support/check.h>
>>  #include <support/xstdio.h>
>>  
>> -static void deeper (void (*f) (void));
>> +/* The DEFAULT_RWX_STACK controls whether the toolchain enables an executable
>> +   stack for the testcase (which does not contain features that might require
>> +   an executable stack, such as nested function).
>> +   Some ABIs do require an executable stack, even if the toolchain supports
>> +   non-executable stack.  In this cases the DEFAULT_RWX_STACK can be
>> +   overrided.  */
> 
> “overridden”, I think.

Ack.

> 
> Rest looks okay.
> 
> Reviewed-by: Florian Weimer <fweimer@redhat.com>
> 
> Thanks,
> Florian
>
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index 4ceecd6249..e7975384ff 100644
--- a/NEWS
+++ b/NEWS
@@ -68,6 +68,12 @@  Deprecated and removed features, and other changes affecting compatibility:
 
 * The nios2*-*-linux-gnu configurations are no longer supported.
 
+* dlopen and dlmopen no longer make the stack executable if a shared
+  library requires it, either implicitly because of a missing GNU_STACK ELF
+  header (and default ABI permission having the executable bit set) or
+  explicitly because of the executable bit in GNU_STACK, and the stack is
+  not already executable.  Instead, loading such objects will fail.
+
 Changes to build and runtime requirements:
 
 * On recent Linux kernels with vDSO getrandom support, getrandom does not
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 284857ddf6..a238ff4286 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1315,12 +1315,13 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
   if (__glibc_unlikely ((stack_flags &~ GL(dl_stack_flags)) & PF_X))
     {
       /* The stack is presently not executable, but this module
-	 requires that it be executable.  */
-#if PTHREAD_IN_LIBC
-      errval = _dl_make_stacks_executable (stack_endp);
-#else
-      errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
-#endif
+	 requires that it be executable.  Only tries to change the
+	 stack protection during process startup.  */
+      if ((mode & __RTLD_DLOPEN) == 0)
+	errval = _dl_make_stack_executable (stack_endp);
+      else
+	errval = EINVAL;
+
       if (errval)
 	{
 	  errstring = N_("\
diff --git a/elf/dl-support.c b/elf/dl-support.c
index ee590edf93..fe1f8c8f6a 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -178,10 +178,6 @@  size_t _dl_stack_cache_actsize;
 uintptr_t _dl_in_flight_stack;
 int _dl_stack_cache_lock;
 #else
-/* If loading a shared object requires that we make the stack executable
-   when it was not, we do it by calling this function.
-   It returns an errno code or zero on success.  */
-int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 #endif
 struct dl_scope_free_list *_dl_scope_free_list;
diff --git a/elf/rtld.c b/elf/rtld.c
index 0637c53017..5eb130be30 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1336,12 +1336,6 @@  dl_main (const ElfW(Phdr) *phdr,
 
   __tls_pre_init_tp ();
 
-#if !PTHREAD_IN_LIBC
-  /* The explicit initialization here is cheaper than processing the reloc
-     in the _rtld_local definition's initializer.  */
-  GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable;
-#endif
-
   /* Process the environment variable which control the behaviour.  */
   skip_env = process_envvars (&state);
 
diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
index 509149ad37..4679a9daca 100644
--- a/elf/tst-execstack.c
+++ b/elf/tst-execstack.c
@@ -23,16 +23,33 @@ 
 #include <stackinfo.h>
 #include <stdbool.h>
 #include <string.h>
+#include <stdlib.h>
 #include <support/xdlfcn.h>
 #include <support/xthread.h>
 #include <support/check.h>
 #include <support/xstdio.h>
 
-static void deeper (void (*f) (void));
+/* The DEFAULT_RWX_STACK controls whether the toolchain enables an executable
+   stack for the testcase (which does not contain features that might require
+   an executable stack, such as nested function).
+   Some ABIs do require an executable stack, even if the toolchain supports
+   non-executable stack.  In this cases the DEFAULT_RWX_STACK can be
+   overrided.  */
+#ifndef DEFAULT_RWX_STACK
+# define DEFAULT_RWX_STACK 0
+#else
+static void
+deeper (void (*f) (void))
+{
+  char stack[1100 * 1024];
+  explicit_bzero (stack, sizeof stack);
+  (*f) ();
+  memfrob (stack, sizeof stack);
+}
+#endif
 
 #if USE_PTHREADS
-# include <pthread.h>
-
+# if DEFAULT_RWX_STACK
 static void *
 tryme_thread (void *f)
 {
@@ -40,16 +57,21 @@  tryme_thread (void *f)
 
   return 0;
 }
+# endif
 
 static pthread_barrier_t startup_barrier, go_barrier;
 static void *
 waiter_thread (void *arg)
 {
-  void **f = arg;
   xpthread_barrier_wait (&startup_barrier);
   xpthread_barrier_wait (&go_barrier);
 
+# if DEFAULT_RWX_STACK
+  void **f = arg;
   (*((void (*) (void)) *f)) ();
+# else
+  abort ();
+# endif
 
   return 0;
 }
@@ -117,7 +139,9 @@  do_test (void)
 
   printf ("executable stacks %sallowed\n", allow_execstack ? "" : "not ");
 
+#if USE_PTHREADS || DEFAULT_RWX_STACK
   static void *f;		/* Address of this is used in other threads. */
+#endif
 
 #if USE_PTHREADS
   /* Create some threads while stacks are nonexecutable.  */
@@ -134,7 +158,7 @@  do_test (void)
   puts ("threads waiting");
 #endif
 
-#if USE_PTHREADS
+#if USE_PTHREADS && DEFAULT_RWX_STACK
   void *old_stack_addr, *new_stack_addr;
   size_t stack_size;
   pthread_t me = pthread_self ();
@@ -156,11 +180,10 @@  do_test (void)
   const char *soname = "tst-execstack-mod.so";
 #endif
   void *h = dlopen (soname, RTLD_LAZY);
-  if (h == NULL)
-    {
-      printf ("cannot load: %s\n", dlerror ());
-      return allow_execstack;
-    }
+#if !DEFAULT_RWX_STACK
+  TEST_VERIFY_EXIT (h == NULL);
+#else
+  TEST_VERIFY_EXIT (h != NULL);
 
   f = xdlsym (h, "tryme");
 
@@ -176,9 +199,9 @@  do_test (void)
 
 # if _STACK_GROWS_DOWN
     new_stack_addr += stack_size;
-# else
+#  else
     new_stack_addr -= stack_size;
-# endif
+#  endif
 
   /* It is possible that the dlopen'd module may have been mmapped just below
      the stack.  The stack size is taken as MIN(stack rlimit size, end of last
@@ -190,12 +213,12 @@  do_test (void)
      should remain the same, which is computed as stackaddr + stacksize.  */
   TEST_VERIFY_EXIT (old_stack_addr == new_stack_addr);
   printf ("Stack address remains the same: %p\n", old_stack_addr);
-#endif
+# endif
 
   /* Test that growing the stack region gets new executable pages too.  */
   deeper ((void (*) (void)) f);
 
-#if USE_PTHREADS
+# if USE_PTHREADS
   /* Test that a fresh thread now gets an executable stack.  */
   xpthread_create (NULL, &tryme_thread, f);
 
@@ -205,19 +228,10 @@  do_test (void)
   xpthread_barrier_wait (&go_barrier);
 
   pthread_exit ((void *) (long int) (! allow_execstack));
+# endif
 #endif
 
   return ! allow_execstack;
 }
 
-static void
-deeper (void (*f) (void))
-{
-  char stack[1100 * 1024];
-  explicit_bzero (stack, sizeof stack);
-  (*f) ();
-  memfrob (stack, sizeof stack);
-}
-
-
 #include <support/test-driver.c>
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index d9adb5856c..9662b43afe 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -448,25 +448,6 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 	  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
-
-	  /* There might have been a race.  Another thread might have
-	     caused the stacks to get exec permission while this new
-	     stack was prepared.  Detect if this was possible and
-	     change the permission if necessary.  */
-	  if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
-				&& (prot & PROT_EXEC) == 0, 0))
-	    {
-	      int err = __nptl_change_stack_perm (pd);
-	      if (err != 0)
-		{
-		  /* Free the stack memory we just allocated.  */
-		  (void) __munmap (mem, size);
-
-		  return err;
-		}
-	    }
-
-
 	  /* Note that all of the stack and the thread descriptor is
 	     zeroed.  This means we do not have to initialize fields
 	     with initial value zero.  This is specifically true for
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index cec56e2214..172bcd2cf7 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -399,13 +399,6 @@  struct rtld_global
 #endif
 #include <dl-procruntime.c>
 
-#if !PTHREAD_IN_LIBC
-  /* If loading a shared object requires that we make the stack executable
-     when it was not, we do it by calling this function.
-     It returns an errno code or zero on success.  */
-  EXTERN int (*_dl_make_stack_executable_hook) (void **);
-#endif
-
   /* Prevailing state of the stack, PF_X indicating it's executable.  */
   EXTERN ElfW(Word) _dl_stack_flags;
 
@@ -702,17 +695,10 @@  extern const ElfW(Phdr) *_dl_phdr;
 extern size_t _dl_phnum;
 #endif
 
-#if PTHREAD_IN_LIBC
-/* This function changes the permissions of all stacks (not just those
-   of the main stack).  */
-int _dl_make_stacks_executable (void **stack_endp) attribute_hidden;
-#else
-/* This is the initial value of GL(dl_make_stack_executable_hook).
-   A threads library can change it.  The ld.so implementation changes
-   the permissions of the main stack only.  */
-extern int _dl_make_stack_executable (void **stack_endp);
-rtld_hidden_proto (_dl_make_stack_executable)
-#endif
+/* This function changes the permission of the memory region pointed
+   by STACK_ENDP to executable and update the internal memory protection
+   flags for future thread stack creation.  */
+int _dl_make_stack_executable (void **stack_endp) attribute_hidden;
 
 /* Variable pointing to the end of the stack (or close to it).  This value
    must be constant over the runtime of the application.  Some programs
diff --git a/sysdeps/mach/hurd/Makefile b/sysdeps/mach/hurd/Makefile
index 698729a8a6..576c42eb68 100644
--- a/sysdeps/mach/hurd/Makefile
+++ b/sysdeps/mach/hurd/Makefile
@@ -300,6 +300,8 @@  ifeq ($(subdir),elf)
 check-execstack-xfail += ld.so libc.so libpthread.so
 # We always create a thread for signals
 test-xfail-tst-single_threaded-pthread-static = yes
+
+CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
 endif
 
 # For bug 30166
diff --git a/sysdeps/mach/hurd/dl-execstack.c b/sysdeps/mach/hurd/dl-execstack.c
index 31371bc6e3..0222430131 100644
--- a/sysdeps/mach/hurd/dl-execstack.c
+++ b/sysdeps/mach/hurd/dl-execstack.c
@@ -47,4 +47,3 @@  _dl_make_stack_executable (void **stack_endp)
   return ENOSYS;
 #endif
 }
-rtld_hidden_def (_dl_make_stack_executable)
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index c2db165052..a8e09bf754 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -289,12 +289,6 @@  extern _Noreturn void __syscall_do_cancel (void) attribute_hidden;
 extern void __nptl_free_tcb (struct pthread *pd);
 libc_hidden_proto (__nptl_free_tcb)
 
-/* Change the permissions of a thread stack.  Called from
-   _dl_make_stacks_executable and pthread_create.  */
-int
-__nptl_change_stack_perm (struct pthread *pd);
-rtld_hidden_proto (__nptl_change_stack_perm)
-
 /* longjmp handling.  */
 extern void __pthread_cleanup_upto (__jmp_buf target, char *targetframe);
 libc_hidden_proto (__pthread_cleanup_upto)
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 213ff5f1fe..55d565545a 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -360,7 +360,4 @@  ld {
     __rseq_offset;
     __rseq_size;
   }
-  GLIBC_PRIVATE {
-    __nptl_change_stack_perm;
-  }
 }
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index b986898598..68db6737f0 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -16,19 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
 #include <ldsodefs.h>
-#include <libintl.h>
-#include <list.h>
-#include <pthreadP.h>
-#include <stackinfo.h>
-#include <stdbool.h>
-#include <sys/mman.h>
-#include <sysdep.h>
-#include <unistd.h>
 
-static int
-make_main_stack_executable (void **stack_endp)
+int
+_dl_make_stack_executable (void **stack_endp)
 {
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
@@ -52,57 +43,3 @@  make_main_stack_executable (void **stack_endp)
 
   return 0;
 }
-
-int
-_dl_make_stacks_executable (void **stack_endp)
-{
-  /* First the main thread's stack.  */
-  int err = make_main_stack_executable (stack_endp);
-  if (err != 0)
-    return err;
-
-  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      err = __nptl_change_stack_perm (list_entry (runp, struct pthread, list));
-      if (err != 0)
-	break;
-    }
-
-  /* Also change the permission for the currently unused stacks.  This
-     might be wasted time but better spend it here than adding a check
-     in the fast path.  */
-  if (err == 0)
-    list_for_each (runp, &GL (dl_stack_cache))
-      {
-	err = __nptl_change_stack_perm (list_entry (runp, struct pthread,
-						    list));
-	if (err != 0)
-	  break;
-      }
-
-  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  return err;
-}
-
-int
-__nptl_change_stack_perm (struct pthread *pd)
-{
-#if _STACK_GROWS_DOWN
-  void *stack = pd->stackblock + pd->guardsize;
-  size_t len = pd->stackblock_size - pd->guardsize;
-#elif _STACK_GROWS_UP
-  void *stack = pd->stackblock;
-  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
-#else
-# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
-#endif
-  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
-    return errno;
-
-  return 0;
-}
-rtld_hidden_def (__nptl_change_stack_perm)
diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
index d5725c69d8..05ec9150b2 100644
--- a/sysdeps/unix/sysv/linux/mips/Makefile
+++ b/sysdeps/unix/sysv/linux/mips/Makefile
@@ -61,6 +61,7 @@  ifeq ($(subdir),elf)
 # this test is expected to fail.
 ifneq ($(mips-has-gnustack),yes)
 test-xfail-check-execstack = yes
+CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
 endif
 endif
 
@@ -68,6 +69,12 @@  ifeq ($(subdir),stdlib)
 gen-as-const-headers += ucontext_i.sym
 endif
 
+ifeq ($(subdir),nptl)
+ifeq ($(mips-force-execstack),yes)
+CFLAGS-tst-execstack-threads.c += -DDEFAULT_RWX_STACK=1
+endif
+endif
+
 ifeq ($(mips-force-execstack),yes)
 CFLAGS-.o += -Wa,-execstack
 CFLAGS-.os += -Wa,-execstack