[v3,1/2] posix: Use posix_spawn on popen

Message ID 20181025174103.31596-1-adhemerval.zanella@linaro.org
State New
Headers show
Series
  • [v3,1/2] posix: Use posix_spawn on popen
Related show

Commit Message

Adhemerval Zanella Oct. 25, 2018, 5:41 p.m.
This patch uses posix_spawn on popen instead of fork and execl.  On Linux
this has the advantage of much lower memory consumption (usually 32 Kb
minimum for the mmap stack area).

Two issues are also fixed with this change:

  * BZ#17490: although POSIX pthread_atfork description only list 'fork'
    as the function that should execute the atfork handlers, popen
    description states that:

      '[...] shall be *as if* a child process were created within the popen()
       call using the fork() function [...]'

    Other libc/system seems to follow the idea atfork handlers should not be
    executed for popen:

    libc/system	| run atfork handles   | notes
    ------------|----------------------|---------------------------------------
    Freebsd	|        no            | uses vfork
    Solaris 11	|        no            |
    MacOSX 11   |        no            | implemented through posix_spawn syscall
    ------------|----------------------|----------------------------------------

    Similar to posix_spawn and system, popen idea is to spawn a different
    binary so all the POSIX rationale to run the atfork handlers to avoid
    internal process inconsistency is not really required and in some cases
    might be unsafe.

  * BZ#22834: now that proc_file_chain is not copied on another process, it
    just require to access is through the proc_file_chain_lock.

Checked on x86_64-linux-gnu and i686-linux-gnu.

	[BZ #22834]
	[BZ #17490]
	* NEWS: Add new semantic for atfork with popen and system.
	* libio/iopopen.c (_IO_new_proc_open): use posix_spawn instead of
	fork and execl.
---
 ChangeLog       |   6 +++
 NEWS            |   6 +++
 libio/iopopen.c | 116 +++++++++++++++++++++++++++++++-----------------
 3 files changed, 87 insertions(+), 41 deletions(-)

-- 
2.17.1

Comments

Adhemerval Zanella Nov. 27, 2018, 5:40 p.m. | #1
Ping.

On 25/10/2018 14:41, Adhemerval Zanella wrote:
> This patch uses posix_spawn on popen instead of fork and execl.  On Linux

> this has the advantage of much lower memory consumption (usually 32 Kb

> minimum for the mmap stack area).

> 

> Two issues are also fixed with this change:

> 

>   * BZ#17490: although POSIX pthread_atfork description only list 'fork'

>     as the function that should execute the atfork handlers, popen

>     description states that:

> 

>       '[...] shall be *as if* a child process were created within the popen()

>        call using the fork() function [...]'

> 

>     Other libc/system seems to follow the idea atfork handlers should not be

>     executed for popen:

> 

>     libc/system	| run atfork handles   | notes

>     ------------|----------------------|---------------------------------------

>     Freebsd	|        no            | uses vfork

>     Solaris 11	|        no            |

>     MacOSX 11   |        no            | implemented through posix_spawn syscall

>     ------------|----------------------|----------------------------------------

> 

>     Similar to posix_spawn and system, popen idea is to spawn a different

>     binary so all the POSIX rationale to run the atfork handlers to avoid

>     internal process inconsistency is not really required and in some cases

>     might be unsafe.

> 

>   * BZ#22834: now that proc_file_chain is not copied on another process, it

>     just require to access is through the proc_file_chain_lock.

> 

> Checked on x86_64-linux-gnu and i686-linux-gnu.

> 

> 	[BZ #22834]

> 	[BZ #17490]

> 	* NEWS: Add new semantic for atfork with popen and system.

> 	* libio/iopopen.c (_IO_new_proc_open): use posix_spawn instead of

> 	fork and execl.

> ---

>  ChangeLog       |   6 +++

>  NEWS            |   6 +++

>  libio/iopopen.c | 116 +++++++++++++++++++++++++++++++-----------------

>  3 files changed, 87 insertions(+), 41 deletions(-)

> 

> diff --git a/NEWS b/NEWS

> index f054dc0433..c76813d12c 100644

> --- a/NEWS

> +++ b/NEWS

> @@ -30,6 +30,12 @@ Major new features:

>    HTM state is saved and restore lazily (the state being saved even when the

>    process actually does not use HTM).

>  

> +* The popen and system do not run atfork handlers anymore (BZ#17490).

> +  Although it is a possible POSIX violation, the POSIX rationale in

> +  pthread_atfork documentation regarding atfork handlers is to handle

> +  incosistent mutex state after fork call in multithread environment.

> +  In both popen and system there is no direct access to user-defined mutexes.

> +

>  Deprecated and removed features, and other changes affecting compatibility:

>  

>  * The glibc.tune tunable namespace has been renamed to glibc.cpu and the

> diff --git a/libio/iopopen.c b/libio/iopopen.c

> index 2eff45b4c8..da24e60eef 100644

> --- a/libio/iopopen.c

> +++ b/libio/iopopen.c

> @@ -34,7 +34,8 @@

>  #include <not-cancel.h>

>  #include <sys/types.h>

>  #include <sys/wait.h>

> -#include <kernel-features.h>

> +#include <spawn.h>

> +#include <paths.h>

>  

>  struct _IO_proc_file

>  {

> @@ -64,8 +65,8 @@ _IO_new_proc_open (FILE *fp, const char *command, const char *mode)

>  {

>    int read_or_write;

>    int parent_end, child_end;

> +  int child_std_end;

>    int pipe_fds[2];

> -  pid_t child_pid;

>  

>    int do_read = 0;

>    int do_write = 0;

> @@ -108,59 +109,92 @@ _IO_new_proc_open (FILE *fp, const char *command, const char *mode)

>  

>    if (do_read)

>      {

> -      parent_end = pipe_fds[0];

> -      child_end = pipe_fds[1];

> +      parent_end = 0;

> +      child_end = 1;

>        read_or_write = _IO_NO_WRITES;

> +      child_std_end = 1;

>      }

>    else

>      {

> -      parent_end = pipe_fds[1];

> -      child_end = pipe_fds[0];

> +      parent_end = 1;

> +      child_end = 0;

>        read_or_write = _IO_NO_READS;

> +      child_std_end = 0;

>      }

>  

> -  ((_IO_proc_file *) fp)->pid = child_pid = __fork ();

> -  if (child_pid == 0)

> -    {

> -      int child_std_end = do_read ? 1 : 0;

> -      struct _IO_proc_file *p;

> -

> -      if (child_end != child_std_end)

> -	__dup2 (child_end, child_std_end);

> -      else

> -	/* The descriptor is already the one we will use.  But it must

> -	   not be marked close-on-exec.  Undo the effects.  */

> -	__fcntl (child_end, F_SETFD, 0);

> -      /* POSIX.2:  "popen() shall ensure that any streams from previous

> -         popen() calls that remain open in the parent process are closed

> -	 in the new child process." */

> -      for (p = proc_file_chain; p; p = p->next)

> -	{

> -	  int fd = _IO_fileno ((FILE *) p);

> +  {

> +    posix_spawn_file_actions_t fa;

> +    /* posix_spawn_file_actions_init does not fail.  */

> +    __posix_spawn_file_actions_init (&fa);

>  

> -	  /* If any stream from previous popen() calls has fileno

> -	     child_std_end, it has been already closed by the dup2 syscall

> -	     above.  */

> -	  if (fd != child_std_end)

> -	    __close_nocancel (fd);

> -	}

> +    /* The descriptor is already the one the child will use.  In this case

> +       it must be moved to another one otherwise, there is no safe way to

> +       remove the close-on-exec flag in the child without creating a FD leak

> +       race in the parent.  */

> +    if (pipe_fds[child_end] == child_std_end)

> +      {

> +	int tmp = __fcntl (child_std_end, F_DUPFD_CLOEXEC, 0);

> +	if (tmp < 0)

> +	  goto spawn_failure;

> +	__close_nocancel (pipe_fds[child_end]);

> +	pipe_fds[child_end] = tmp;

> +      }

>  

> -      execl ("/bin/sh", "sh", "-c", command, (char *) 0);

> -      _exit (127);

> -    }

> -  __close_nocancel (child_end);

> -  if (child_pid < 0)

> -    {

> -      __close_nocancel (parent_end);

> -      return NULL;

> -    }

> +    if (__posix_spawn_file_actions_adddup2 (&fa, pipe_fds[child_end],

> +	child_std_end) != 0)

> +      goto spawn_failure;

> +

> +    /* POSIX.2: "popen() shall ensure that any streams from previous popen()

> +       calls that remain open in the parent process are closed in the new

> +       child process." */

> +    bool addclose_failure = false;

> +#ifdef _IO_MTSAFE_IO

> +    _IO_cleanup_region_start_noarg (unlock);

> +    _IO_lock_lock (proc_file_chain_lock);

> +#endif

> +    for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)

> +      {

> +	int fd = _IO_fileno ((FILE *) p);

> +

> +	/* If any stream from previous popen() calls has fileno

> +	   child_send, it has been already closed by the dup2 syscall

> +	   above.  */

> +	if (fd != child_std_end

> +	    && __posix_spawn_file_actions_addclose (&fa, fd) != 0)

> +	  {

> +	    addclose_failure = true;

> +	    break;

> +	  }

> +      }

> +#ifdef _IO_MTSAFE_IO

> +     _IO_lock_unlock (proc_file_chain_lock);

> +     _IO_cleanup_region_end (0);

> +#endif

> +    if (addclose_failure)

> +      goto spawn_failure;

> +

> +    if (__posix_spawn (&((_IO_proc_file *) fp)->pid, _PATH_BSHELL, &fa, 0,

> +		     (char *const[]){ (char*) "sh", (char*) "-c",

> +		     (char *) command, NULL }, __environ) != 0)

> +      {

> +      spawn_failure:

> +	__posix_spawn_file_actions_destroy (&fa);

> +	__close_nocancel (pipe_fds[child_end]);

> +	__close_nocancel (pipe_fds[parent_end]);

> +	__set_errno (ENOMEM);

> +	return NULL;

> +      }

> +

> +    __posix_spawn_file_actions_destroy (&fa);

> +  }

> +  __close_nocancel (pipe_fds[child_end]);

>  

>    if (!do_cloexec)

>      /* Undo the effects of the pipe2 call which set the

>         close-on-exec flag.  */

> -    __fcntl (parent_end, F_SETFD, 0);

> +    __fcntl (pipe_fds[parent_end], F_SETFD, 0);

>  

> -  _IO_fileno (fp) = parent_end;

> +  _IO_fileno (fp) = pipe_fds[parent_end];

>  

>    /* Link into proc_file_chain. */

>  #ifdef _IO_MTSAFE_IO

>
Florian Weimer Nov. 28, 2018, 3:47 p.m. | #2
* Adhemerval Zanella:

>   * BZ#22834: now that proc_file_chain is not copied on another process, it

>     just require to access is through the proc_file_chain_lock.


Sorry, what does that mean? (“copied on another process” in particular.)

> +* The popen and system do not run atfork handlers anymore (BZ#17490).

> +  Although it is a possible POSIX violation, the POSIX rationale in

> +  pthread_atfork documentation regarding atfork handlers is to handle

> +  incosistent mutex state after fork call in multithread environment.

> +  In both popen and system there is no direct access to user-defined mutexes.

> +

>  Deprecated and removed features, and other changes affecting compatibility:

>  

>  * The glibc.tune tunable namespace has been renamed to glibc.cpu and the

> diff --git a/libio/iopopen.c b/libio/iopopen.c

> index 2eff45b4c8..da24e60eef 100644

> --- a/libio/iopopen.c

> +++ b/libio/iopopen.c

> @@ -34,7 +34,8 @@

>  #include <not-cancel.h>

>  #include <sys/types.h>

>  #include <sys/wait.h>

> -#include <kernel-features.h>

> +#include <spawn.h>

> +#include <paths.h>

>  

>  struct _IO_proc_file

>  {

> @@ -64,8 +65,8 @@ _IO_new_proc_open (FILE *fp, const char *command, const char *mode)

>  {

>    int read_or_write;

>    int parent_end, child_end;

> +  int child_std_end;


Should this be child_pipe_fd or something more descriptive?  Perhaps
also add a comment to the previous line that these are indexes into
pipe_fds below?

> +    if (__posix_spawn_file_actions_adddup2 (&fa, pipe_fds[child_end],

> +	child_std_end) != 0)

> +      goto spawn_failure;

> +

> +    /* POSIX.2: "popen() shall ensure that any streams from previous popen()

> +       calls that remain open in the parent process are closed in the new

> +       child process." */

> +    bool addclose_failure = false;

> +#ifdef _IO_MTSAFE_IO

> +    _IO_cleanup_region_start_noarg (unlock);

> +    _IO_lock_lock (proc_file_chain_lock);

> +#endif

> +    for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)

> +      {

> +	int fd = _IO_fileno ((FILE *) p);

> +

> +	/* If any stream from previous popen() calls has fileno

> +	   child_send, it has been already closed by the dup2 syscall

> +	   above.  */


Typo: child_send.  But I'm not sure how this can happen.  Shouldn't the
descriptor still be open if the stream is linked into the chain?

If not, that doesn't really fix the race.

Rest looks okay to me.

Thanks,
Florian
Adhemerval Zanella Nov. 28, 2018, 7:27 p.m. | #3
On 28/11/2018 13:47, Florian Weimer wrote:
> * Adhemerval Zanella:

> 

>>   * BZ#22834: now that proc_file_chain is not copied on another process, it

>>     just require to access is through the proc_file_chain_lock.

> 

> Sorry, what does that mean? (“copied on another process” in particular.)


What I mean is with posix_spawn the scenario described in BZ#22834, where the
forked process might contain invalid internal state when updating
proc_file_chain should not happen because helper process used by
posix_spawn does not access proc_file_chain.  The wording I used indeed
might be improved, what about:

  * BZ#22834: the described scenario, where the forked process might access
    invalid memory due an inconsistent state in multithread environment,
    should not happen because posix_spawn does not access the affected
    data structure (proc_file_chain).

> 

>> +* The popen and system do not run atfork handlers anymore (BZ#17490).

>> +  Although it is a possible POSIX violation, the POSIX rationale in

>> +  pthread_atfork documentation regarding atfork handlers is to handle

>> +  incosistent mutex state after fork call in multithread environment.

>> +  In both popen and system there is no direct access to user-defined mutexes.

>> +

>>  Deprecated and removed features, and other changes affecting compatibility:

>>  

>>  * The glibc.tune tunable namespace has been renamed to glibc.cpu and the

>> diff --git a/libio/iopopen.c b/libio/iopopen.c

>> index 2eff45b4c8..da24e60eef 100644

>> --- a/libio/iopopen.c

>> +++ b/libio/iopopen.c

>> @@ -34,7 +34,8 @@

>>  #include <not-cancel.h>

>>  #include <sys/types.h>

>>  #include <sys/wait.h>

>> -#include <kernel-features.h>

>> +#include <spawn.h>

>> +#include <paths.h>

>>  

>>  struct _IO_proc_file

>>  {

>> @@ -64,8 +65,8 @@ _IO_new_proc_open (FILE *fp, const char *command, const char *mode)

>>  {

>>    int read_or_write;

>>    int parent_end, child_end;

>> +  int child_std_end;

> 

> Should this be child_pipe_fd or something more descriptive?  Perhaps

> also add a comment to the previous line that these are indexes into

> pipe_fds below?


I don't have a strong opinion here, I renamed child_std_end to child_pipe_fd
and add a comment stating parent_end, child_end are both indexes.

> 

>> +    if (__posix_spawn_file_actions_adddup2 (&fa, pipe_fds[child_end],

>> +	child_std_end) != 0)

>> +      goto spawn_failure;

>> +

>> +    /* POSIX.2: "popen() shall ensure that any streams from previous popen()

>> +       calls that remain open in the parent process are closed in the new

>> +       child process." */

>> +    bool addclose_failure = false;

>> +#ifdef _IO_MTSAFE_IO

>> +    _IO_cleanup_region_start_noarg (unlock);

>> +    _IO_lock_lock (proc_file_chain_lock);

>> +#endif

>> +    for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)

>> +      {

>> +	int fd = _IO_fileno ((FILE *) p);

>> +

>> +	/* If any stream from previous popen() calls has fileno

>> +	   child_send, it has been already closed by the dup2 syscall

>> +	   above.  */

> 

> Typo: child_send.  But I'm not sure how this can happen.  Shouldn't the

> descriptor still be open if the stream is linked into the chain?


Ack.

> 

> If not, that doesn't really fix the race.


This originally came from a RH issue [1] where the program:

---
#include <assert.h>
#include <stdio.h>

int main(void)
{
  FILE *p1, *p2;
  int result1, result2;
  
  fclose(stdout);
  p1 = popen("echo a", "r"); assert(p1 != NULL);
  p2 = popen("echo b", "r"); assert(p2 != NULL);
  result1 = pclose(p1); result2 = pclose(p2);
  fprintf(stderr, "result1 = %d, result2 = %d\n", result1, result2);
  return 0;
}
---

The issue is adddup2 action will dup2 the pipe child end to child_pipe_fd
(in this case '1' since it has closed stdout), so adding another action to
close is wrong.

[1] https://bugzilla.redhat.com/show_bug.cgi?id=248281

> 

> Rest looks okay to me.


Ok to push with the modifications above?

> 

> Thanks,

> Florian
Florian Weimer Nov. 28, 2018, 7:51 p.m. | #4
* Adhemerval Zanella:

> On 28/11/2018 13:47, Florian Weimer wrote:

>> * Adhemerval Zanella:

>> 

>>>   * BZ#22834: now that proc_file_chain is not copied on another process, it

>>>     just require to access is through the proc_file_chain_lock.

>> 

>> Sorry, what does that mean? (“copied on another process” in particular.)

>

> What I mean is with posix_spawn the scenario described in BZ#22834, where the

> forked process might contain invalid internal state when updating

> proc_file_chain should not happen because helper process used by

> posix_spawn does not access proc_file_chain.  The wording I used indeed

> might be improved, what about:

>

>   * BZ#22834: the described scenario, where the forked process might access

>     invalid memory due an inconsistent state in multithread environment,

>     should not happen because posix_spawn does not access the affected

>     data structure (proc_file_chain).


“multithreaded environment”

Much clearer, thanks.
>>> +	/* If any stream from previous popen() calls has fileno

>>> +	   child_send, it has been already closed by the dup2 syscall

>>> +	   above.  */

>> 

>> Typo: child_send.  But I'm not sure how this can happen.  Shouldn't the

>> descriptor still be open if the stream is linked into the chain?

>

> Ack.

>

>> 

>> If not, that doesn't really fix the race.

>

> This originally came from a RH issue [1] where the program:

>

> ---

> #include <assert.h>

> #include <stdio.h>

>

> int main(void)

> {

>   FILE *p1, *p2;

>   int result1, result2;

>   

>   fclose(stdout);

>   p1 = popen("echo a", "r"); assert(p1 != NULL);

>   p2 = popen("echo b", "r"); assert(p2 != NULL);

>   result1 = pclose(p1); result2 = pclose(p2);

>   fprintf(stderr, "result1 = %d, result2 = %d\n", result1, result2);

>   return 0;

> }

> ---

>

> The issue is adddup2 action will dup2 the pipe child end to child_pipe_fd

> (in this case '1' since it has closed stdout), so adding another action to

> close is wrong.

>

> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248281


Okay, thanks for providing the reference.

It think the fix is incorrect in the sense that POSIX is wrong here.  As
specified, popen can launch the new subprocess with a close standard
input/output/error if it came from popen in the parent process, which is
wrong.  I believe the actual check should be against fd <= 2, not fd !=
child_std_end.  But this is a separate discussion, not related to this
patch.

But looking at this, I realized that we have a race condition due to the
use of proc_file_chain_lock: A concurrent call to popen may add a
descriptor to the chain after the list traversal and before the
posix_spawn call.  Or the descriptor might now be something else because
pclose was called concurrently.  The old code did not have this
particular problem because after the fork, the set of open descriptors
was stable.

Do you thin kit would be acceptable to extend the scope of the critical
section to include the posix_spawn call?  This obviously reduces
concurrency somewhat.  I don't think there is a lock ordering issue,
though.

Thanks,
Florian
Adhemerval Zanella Nov. 28, 2018, 9:50 p.m. | #5
On 28/11/2018 17:51, Florian Weimer wrote:
> * Adhemerval Zanella:

> 

>> On 28/11/2018 13:47, Florian Weimer wrote:

>>> * Adhemerval Zanella:

>>>

>>>>   * BZ#22834: now that proc_file_chain is not copied on another process, it

>>>>     just require to access is through the proc_file_chain_lock.

>>>

>>> Sorry, what does that mean? (“copied on another process” in particular.)

>>

>> What I mean is with posix_spawn the scenario described in BZ#22834, where the

>> forked process might contain invalid internal state when updating

>> proc_file_chain should not happen because helper process used by

>> posix_spawn does not access proc_file_chain.  The wording I used indeed

>> might be improved, what about:

>>

>>   * BZ#22834: the described scenario, where the forked process might access

>>     invalid memory due an inconsistent state in multithread environment,

>>     should not happen because posix_spawn does not access the affected

>>     data structure (proc_file_chain).

> 

> “multithreaded environment”


Ack.

> 

> Much clearer, thanks.

>>>> +	/* If any stream from previous popen() calls has fileno

>>>> +	   child_send, it has been already closed by the dup2 syscall

>>>> +	   above.  */

>>>

>>> Typo: child_send.  But I'm not sure how this can happen.  Shouldn't the

>>> descriptor still be open if the stream is linked into the chain?

>>

>> Ack.

>>

>>>

>>> If not, that doesn't really fix the race.

>>

>> This originally came from a RH issue [1] where the program:

>>

>> ---

>> #include <assert.h>

>> #include <stdio.h>

>>

>> int main(void)

>> {

>>   FILE *p1, *p2;

>>   int result1, result2;

>>   

>>   fclose(stdout);

>>   p1 = popen("echo a", "r"); assert(p1 != NULL);

>>   p2 = popen("echo b", "r"); assert(p2 != NULL);

>>   result1 = pclose(p1); result2 = pclose(p2);

>>   fprintf(stderr, "result1 = %d, result2 = %d\n", result1, result2);

>>   return 0;

>> }

>> ---

>>

>> The issue is adddup2 action will dup2 the pipe child end to child_pipe_fd

>> (in this case '1' since it has closed stdout), so adding another action to

>> close is wrong.

>>

>> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248281

> 

> Okay, thanks for providing the reference.

> 

> It think the fix is incorrect in the sense that POSIX is wrong here.  As

> specified, popen can launch the new subprocess with a close standard

> input/output/error if it came from popen in the parent process, which is

> wrong.  I believe the actual check should be against fd <= 2, not fd !=

> child_std_end.  But this is a separate discussion, not related to this

> patch.

> 


Do you mean our interpretation of POSIX or a POSIX defect? 

> But looking at this, I realized that we have a race condition due to the

> use of proc_file_chain_lock: A concurrent call to popen may add a

> descriptor to the chain after the list traversal and before the

> posix_spawn call.  Or the descriptor might now be something else because

> pclose was called concurrently.  The old code did not have this

> particular problem because after the fork, the set of open descriptors

> was stable.

> 

> Do you thin kit would be acceptable to extend the scope of the critical

> section to include the posix_spawn call?  This obviously reduces

> concurrency somewhat.  I don't think there is a lock ordering issue,

> though.

> 

You are right, the concurrent transverse and update of proc_file_chain
lead to a race condition. However, I also think it would be a gain to 
extend the scope of critical section to include posix_spaw. At least for
Linux posix_spawn is noticeable faster and scalable than fork plus execve,
specially with large resident memory sets.

Below is an updated patch with the updated commit message. 

---

This patch uses posix_spawn on popen instead of fork and execl.  On Linux
this has the advantage of much lower memory consumption (usually 32 Kb
minimum for the mmap stack area).

Two issues are also fixed with this change:

  * BZ#17490: although POSIX pthread_atfork description only list 'fork'
    as the function that should execute the atfork handlers, popen
    description states that:

      '[...] shall be *as if* a child process were created within the popen()
       call using the fork() function [...]'

    Other libc/system seems to follow the idea atfork handlers should not be
    executed for popen:

    libc/system	| run atfork handles   | notes
    ------------|----------------------|---------------------------------------
    Freebsd	|        no            | uses vfork
    Solaris 11	|        no            |
    MacOSX 11   |        no            | implemented through posix_spawn syscall
    ------------|----------------------|----------------------------------------

    Similar to posix_spawn and system, popen idea is to spawn a different
    binary so all the POSIX rationale to run the atfork handlers to avoid
    internal process inconsistency is not really required and in some cases
    might be unsafe.

  * BZ#22834: the described scenario, where the forked process might access
    invalid memory due an inconsistent state in multithreaded environment,
    should not happen because posix_spawn does not access the affected
    data structure (proc_file_chain).

Checked on x86_64-linux-gnu and i686-linux-gnu.

	[BZ #22834]
	[BZ #17490]
	* NEWS: Add new semantic for atfork with popen and system.
	* libio/iopopen.c (_IO_new_proc_open): use posix_spawn instead of
	fork and execl.
---
 ChangeLog       |   8 ++++
 NEWS            |   6 +++
 libio/iopopen.c | 124 ++++++++++++++++++++++++++++++------------------
 3 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/NEWS b/NEWS
index 1098be1afb..8483dcf492 100644
--- a/NEWS
+++ b/NEWS
@@ -35,6 +35,12 @@ Major new features:
   different directory.  This is a GNU extension and similar to the
   Solaris function of the same name.
 
+* The popen and system do not run atfork handlers anymore (BZ#17490).
+  Although it is a possible POSIX violation, the POSIX rationale in
+  pthread_atfork documentation regarding atfork handlers is to handle
+  incosistent mutex state after fork call in multithread environment.
+  In both popen and system there is no direct access to user-defined mutexes.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * The glibc.tune tunable namespace has been renamed to glibc.cpu and the
diff --git a/libio/iopopen.c b/libio/iopopen.c
index 2eff45b4c8..d181baa5ff 100644
--- a/libio/iopopen.c
+++ b/libio/iopopen.c
@@ -34,7 +34,8 @@
 #include <not-cancel.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-#include <kernel-features.h>
+#include <spawn.h>
+#include <paths.h>
 
 struct _IO_proc_file
 {
@@ -59,13 +60,52 @@ unlock (void *not_used)
 }
 #endif
 
+static bool
+spawn_proc (posix_spawn_file_actions_t *fa, FILE *fp, const char *command,
+	    int do_cloexec, int pipe_fds[2], int parent_end, int child_end,
+	    int child_pipe_fd)
+{
+  for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)
+    {
+      int fd = _IO_fileno ((FILE *) p);
+
+      /* If any stream from previous popen() calls has fileno
+	 child_pipe_fd, it has been already closed by the adddup2 action
+	 above.  */
+      if (fd != child_pipe_fd
+	  && __posix_spawn_file_actions_addclose (fa, fd) != 0)
+	return false;
+    }
+
+  if (__posix_spawn (&((_IO_proc_file *) fp)->pid, _PATH_BSHELL, fa, 0,
+		     (char *const[]){ (char*) "sh", (char*) "-c",
+		     (char *) command, NULL }, __environ) != 0)
+    return false;
+
+  __close_nocancel (pipe_fds[child_end]);
+
+  if (!do_cloexec)
+    /* Undo the effects of the pipe2 call which set the
+       close-on-exec flag.  */
+    __fcntl (pipe_fds[parent_end], F_SETFD, 0);
+
+  _IO_fileno (fp) = pipe_fds[parent_end];
+
+  ((_IO_proc_file *) fp)->next = proc_file_chain;
+  proc_file_chain = (_IO_proc_file *) fp;
+
+  return true;
+}
+
 FILE *
 _IO_new_proc_open (FILE *fp, const char *command, const char *mode)
 {
   int read_or_write;
+  /* These are indexes for pipe_fds.  */
   int parent_end, child_end;
   int pipe_fds[2];
-  pid_t child_pid;
+  int child_pipe_fd;
+  bool spawn_ok;
 
   int do_read = 0;
   int do_write = 0;
@@ -108,72 +148,62 @@ _IO_new_proc_open (FILE *fp, const char *command, const char *mode)
 
   if (do_read)
     {
-      parent_end = pipe_fds[0];
-      child_end = pipe_fds[1];
+      parent_end = 0;
+      child_end = 1;
       read_or_write = _IO_NO_WRITES;
+      child_pipe_fd = 1;
     }
   else
     {
-      parent_end = pipe_fds[1];
-      child_end = pipe_fds[0];
+      parent_end = 1;
+      child_end = 0;
       read_or_write = _IO_NO_READS;
+      child_pipe_fd = 0;
     }
 
-  ((_IO_proc_file *) fp)->pid = child_pid = __fork ();
-  if (child_pid == 0)
-    {
-      int child_std_end = do_read ? 1 : 0;
-      struct _IO_proc_file *p;
-
-      if (child_end != child_std_end)
-	__dup2 (child_end, child_std_end);
-      else
-	/* The descriptor is already the one we will use.  But it must
-	   not be marked close-on-exec.  Undo the effects.  */
-	__fcntl (child_end, F_SETFD, 0);
-      /* POSIX.2:  "popen() shall ensure that any streams from previous
-         popen() calls that remain open in the parent process are closed
-	 in the new child process." */
-      for (p = proc_file_chain; p; p = p->next)
-	{
-	  int fd = _IO_fileno ((FILE *) p);
+  posix_spawn_file_actions_t fa;
+  /* posix_spawn_file_actions_init does not fail.  */
+  __posix_spawn_file_actions_init (&fa);
 
-	  /* If any stream from previous popen() calls has fileno
-	     child_std_end, it has been already closed by the dup2 syscall
-	     above.  */
-	  if (fd != child_std_end)
-	    __close_nocancel (fd);
-	}
-
-      execl ("/bin/sh", "sh", "-c", command, (char *) 0);
-      _exit (127);
-    }
-  __close_nocancel (child_end);
-  if (child_pid < 0)
+  /* The descriptor is already the one the child will use.  In this case
+     it must be moved to another one otherwise, there is no safe way to
+     remove the close-on-exec flag in the child without creating a FD leak
+     race in the parent.  */
+  if (pipe_fds[child_end] == child_pipe_fd)
     {
-      __close_nocancel (parent_end);
-      return NULL;
+      int tmp = __fcntl (child_pipe_fd, F_DUPFD_CLOEXEC, 0);
+      if (tmp < 0)
+	goto spawn_failure;
+      __close_nocancel (pipe_fds[child_end]);
+      pipe_fds[child_end] = tmp;
     }
 
-  if (!do_cloexec)
-    /* Undo the effects of the pipe2 call which set the
-       close-on-exec flag.  */
-    __fcntl (parent_end, F_SETFD, 0);
+  if (__posix_spawn_file_actions_adddup2 (&fa, pipe_fds[child_end],
+      child_pipe_fd) != 0)
+    goto spawn_failure;
 
-  _IO_fileno (fp) = parent_end;
-
-  /* Link into proc_file_chain. */
 #ifdef _IO_MTSAFE_IO
   _IO_cleanup_region_start_noarg (unlock);
   _IO_lock_lock (proc_file_chain_lock);
 #endif
-  ((_IO_proc_file *) fp)->next = proc_file_chain;
-  proc_file_chain = (_IO_proc_file *) fp;
+  spawn_ok = spawn_proc (&fa, fp, command, do_cloexec, pipe_fds,
+			 parent_end, child_end, child_pipe_fd);
 #ifdef _IO_MTSAFE_IO
   _IO_lock_unlock (proc_file_chain_lock);
   _IO_cleanup_region_end (0);
 #endif
 
+  __posix_spawn_file_actions_destroy (&fa);
+
+  if (!spawn_ok)
+    {
+    spawn_failure:
+      __close_nocancel (pipe_fds[child_end]);
+      __close_nocancel (pipe_fds[parent_end]);
+      __set_errno (ENOMEM);
+      return NULL;
+    }
+
   _IO_mask_flags (fp, read_or_write, _IO_NO_READS|_IO_NO_WRITES);
   return fp;
 }
-- 
2.17.1
Florian Weimer Nov. 29, 2018, 4:41 p.m. | #6
* Adhemerval Zanella:

>>> The issue is adddup2 action will dup2 the pipe child end to child_pipe_fd

>>> (in this case '1' since it has closed stdout), so adding another action to

>>> close is wrong.

>>>

>>> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248281

>> 

>> Okay, thanks for providing the reference.

>> 

>> It think the fix is incorrect in the sense that POSIX is wrong here.  As

>> specified, popen can launch the new subprocess with a close standard

>> input/output/error if it came from popen in the parent process, which is

>> wrong.  I believe the actual check should be against fd <= 2, not fd !=

>> child_std_end.  But this is a separate discussion, not related to this

>> patch.

>

> Do you mean our interpretation of POSIX or a POSIX defect?


It's a defect.  POSIX should never mandate to close the standard I/O
streams.

>> But looking at this, I realized that we have a race condition due to the

>> use of proc_file_chain_lock: A concurrent call to popen may add a

>> descriptor to the chain after the list traversal and before the

>> posix_spawn call.  Or the descriptor might now be something else because

>> pclose was called concurrently.  The old code did not have this

>> particular problem because after the fork, the set of open descriptors

>> was stable.

>> 

>> Do you thin kit would be acceptable to extend the scope of the critical

>> section to include the posix_spawn call?  This obviously reduces

>> concurrency somewhat.  I don't think there is a lock ordering issue,

>> though.


> You are right, the concurrent transverse and update of proc_file_chain

> lead to a race condition. However, I also think it would be a gain to 

> extend the scope of critical section to include posix_spaw. At least for

> Linux posix_spawn is noticeable faster and scalable than fork plus execve,

> specially with large resident memory sets.


The only blocking step (on things like network file systems) is the
execve in the child process because we do not open any new files.  Are
we okay with that?

> +static bool

> +spawn_proc (posix_spawn_file_actions_t *fa, FILE *fp, const char *command,

> +	    int do_cloexec, int pipe_fds[2], int parent_end, int child_end,

> +	    int child_pipe_fd)

> +{

> +  for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)

> +    {

> +      int fd = _IO_fileno ((FILE *) p);

> +

> +      /* If any stream from previous popen() calls has fileno

> +	 child_pipe_fd, it has been already closed by the adddup2 action

> +	 above.  */

> +      if (fd != child_pipe_fd

> +	  && __posix_spawn_file_actions_addclose (fa, fd) != 0)

> +	return false;

> +    }

> +

> +  if (__posix_spawn (&((_IO_proc_file *) fp)->pid, _PATH_BSHELL, fa, 0,

> +		     (char *const[]){ (char*) "sh", (char*) "-c",

> +		     (char *) command, NULL }, __environ) != 0)

> +    return false;


Please add a comment somewhere that the caller has to acquire the chain
lock, and why that lock needs to cover the posix_spawn call.

(spawn_proc should perhaps be spawn_process, to avoid the
process/procedure confusion.)

Otherwise, it looks ready to commit to me.

Thanks,
Florian
Adhemerval Zanella Nov. 29, 2018, 8:23 p.m. | #7
On 29/11/2018 14:41, Florian Weimer wrote:
> * Adhemerval Zanella:

> 

>>>> The issue is adddup2 action will dup2 the pipe child end to child_pipe_fd

>>>> (in this case '1' since it has closed stdout), so adding another action to

>>>> close is wrong.

>>>>

>>>> [1] https://bugzilla.redhat.com/show_bug.cgi?id=248281

>>>

>>> Okay, thanks for providing the reference.

>>>

>>> It think the fix is incorrect in the sense that POSIX is wrong here.  As

>>> specified, popen can launch the new subprocess with a close standard

>>> input/output/error if it came from popen in the parent process, which is

>>> wrong.  I believe the actual check should be against fd <= 2, not fd !=

>>> child_std_end.  But this is a separate discussion, not related to this

>>> patch.

>>

>> Do you mean our interpretation of POSIX or a POSIX defect?

> 

> It's a defect.  POSIX should never mandate to close the standard I/O

> streams.


The rationale I can think of is to avoid leak file descriptors to new
popen processes since close-on-exec is not specified in standard. 

> 

>>> But looking at this, I realized that we have a race condition due to the

>>> use of proc_file_chain_lock: A concurrent call to popen may add a

>>> descriptor to the chain after the list traversal and before the

>>> posix_spawn call.  Or the descriptor might now be something else because

>>> pclose was called concurrently.  The old code did not have this

>>> particular problem because after the fork, the set of open descriptors

>>> was stable.

>>>

>>> Do you thin kit would be acceptable to extend the scope of the critical

>>> section to include the posix_spawn call?  This obviously reduces

>>> concurrency somewhat.  I don't think there is a lock ordering issue,

>>> though.

> 

>> You are right, the concurrent transverse and update of proc_file_chain

>> lead to a race condition. However, I also think it would be a gain to 

>> extend the scope of critical section to include posix_spaw. At least for

>> Linux posix_spawn is noticeable faster and scalable than fork plus execve,

>> specially with large resident memory sets.

> 

> The only blocking step (on things like network file systems) is the

> execve in the child process because we do not open any new files.  Are

> we okay with that?


IMHO, current posix_spawn should be a gain over fork (also considering vfork
not being an option). Even if add the blocking step over the execve.

> 

>> +static bool

>> +spawn_proc (posix_spawn_file_actions_t *fa, FILE *fp, const char *command,

>> +	    int do_cloexec, int pipe_fds[2], int parent_end, int child_end,

>> +	    int child_pipe_fd)

>> +{

>> +  for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)

>> +    {

>> +      int fd = _IO_fileno ((FILE *) p);

>> +

>> +      /* If any stream from previous popen() calls has fileno

>> +	 child_pipe_fd, it has been already closed by the adddup2 action

>> +	 above.  */

>> +      if (fd != child_pipe_fd

>> +	  && __posix_spawn_file_actions_addclose (fa, fd) != 0)

>> +	return false;

>> +    }

>> +

>> +  if (__posix_spawn (&((_IO_proc_file *) fp)->pid, _PATH_BSHELL, fa, 0,

>> +		     (char *const[]){ (char*) "sh", (char*) "-c",

>> +		     (char *) command, NULL }, __environ) != 0)

>> +    return false;

> 

> Please add a comment somewhere that the caller has to acquire the chain

> lock, and why that lock needs to cover the posix_spawn call.

> 

> (spawn_proc should perhaps be spawn_process, to avoid the

> process/procedure confusion.)


Ack, I changed it to spawn_process and added the comment:

/* POSIX states popen shall ensure that any streams from previous popen()
   calls that remain open in the parent process should be closed in the new
   child process.
   To avoid a race-condition between checking which file descriptors need to
   be close (by transversing the proc_file_chain list) and the insertion of a
   new one after a successful posix_spawn this function should be called
   with proc_file_chain_lock acquired.  */


> 

> Otherwise, it looks ready to commit to me.

> 

> Thanks,

> Florian

>

Patch

diff --git a/NEWS b/NEWS
index f054dc0433..c76813d12c 100644
--- a/NEWS
+++ b/NEWS
@@ -30,6 +30,12 @@  Major new features:
   HTM state is saved and restore lazily (the state being saved even when the
   process actually does not use HTM).
 
+* The popen and system do not run atfork handlers anymore (BZ#17490).
+  Although it is a possible POSIX violation, the POSIX rationale in
+  pthread_atfork documentation regarding atfork handlers is to handle
+  incosistent mutex state after fork call in multithread environment.
+  In both popen and system there is no direct access to user-defined mutexes.
+
 Deprecated and removed features, and other changes affecting compatibility:
 
 * The glibc.tune tunable namespace has been renamed to glibc.cpu and the
diff --git a/libio/iopopen.c b/libio/iopopen.c
index 2eff45b4c8..da24e60eef 100644
--- a/libio/iopopen.c
+++ b/libio/iopopen.c
@@ -34,7 +34,8 @@ 
 #include <not-cancel.h>
 #include <sys/types.h>
 #include <sys/wait.h>
-#include <kernel-features.h>
+#include <spawn.h>
+#include <paths.h>
 
 struct _IO_proc_file
 {
@@ -64,8 +65,8 @@  _IO_new_proc_open (FILE *fp, const char *command, const char *mode)
 {
   int read_or_write;
   int parent_end, child_end;
+  int child_std_end;
   int pipe_fds[2];
-  pid_t child_pid;
 
   int do_read = 0;
   int do_write = 0;
@@ -108,59 +109,92 @@  _IO_new_proc_open (FILE *fp, const char *command, const char *mode)
 
   if (do_read)
     {
-      parent_end = pipe_fds[0];
-      child_end = pipe_fds[1];
+      parent_end = 0;
+      child_end = 1;
       read_or_write = _IO_NO_WRITES;
+      child_std_end = 1;
     }
   else
     {
-      parent_end = pipe_fds[1];
-      child_end = pipe_fds[0];
+      parent_end = 1;
+      child_end = 0;
       read_or_write = _IO_NO_READS;
+      child_std_end = 0;
     }
 
-  ((_IO_proc_file *) fp)->pid = child_pid = __fork ();
-  if (child_pid == 0)
-    {
-      int child_std_end = do_read ? 1 : 0;
-      struct _IO_proc_file *p;
-
-      if (child_end != child_std_end)
-	__dup2 (child_end, child_std_end);
-      else
-	/* The descriptor is already the one we will use.  But it must
-	   not be marked close-on-exec.  Undo the effects.  */
-	__fcntl (child_end, F_SETFD, 0);
-      /* POSIX.2:  "popen() shall ensure that any streams from previous
-         popen() calls that remain open in the parent process are closed
-	 in the new child process." */
-      for (p = proc_file_chain; p; p = p->next)
-	{
-	  int fd = _IO_fileno ((FILE *) p);
+  {
+    posix_spawn_file_actions_t fa;
+    /* posix_spawn_file_actions_init does not fail.  */
+    __posix_spawn_file_actions_init (&fa);
 
-	  /* If any stream from previous popen() calls has fileno
-	     child_std_end, it has been already closed by the dup2 syscall
-	     above.  */
-	  if (fd != child_std_end)
-	    __close_nocancel (fd);
-	}
+    /* The descriptor is already the one the child will use.  In this case
+       it must be moved to another one otherwise, there is no safe way to
+       remove the close-on-exec flag in the child without creating a FD leak
+       race in the parent.  */
+    if (pipe_fds[child_end] == child_std_end)
+      {
+	int tmp = __fcntl (child_std_end, F_DUPFD_CLOEXEC, 0);
+	if (tmp < 0)
+	  goto spawn_failure;
+	__close_nocancel (pipe_fds[child_end]);
+	pipe_fds[child_end] = tmp;
+      }
 
-      execl ("/bin/sh", "sh", "-c", command, (char *) 0);
-      _exit (127);
-    }
-  __close_nocancel (child_end);
-  if (child_pid < 0)
-    {
-      __close_nocancel (parent_end);
-      return NULL;
-    }
+    if (__posix_spawn_file_actions_adddup2 (&fa, pipe_fds[child_end],
+	child_std_end) != 0)
+      goto spawn_failure;
+
+    /* POSIX.2: "popen() shall ensure that any streams from previous popen()
+       calls that remain open in the parent process are closed in the new
+       child process." */
+    bool addclose_failure = false;
+#ifdef _IO_MTSAFE_IO
+    _IO_cleanup_region_start_noarg (unlock);
+    _IO_lock_lock (proc_file_chain_lock);
+#endif
+    for (struct _IO_proc_file *p = proc_file_chain; p; p = p->next)
+      {
+	int fd = _IO_fileno ((FILE *) p);
+
+	/* If any stream from previous popen() calls has fileno
+	   child_send, it has been already closed by the dup2 syscall
+	   above.  */
+	if (fd != child_std_end
+	    && __posix_spawn_file_actions_addclose (&fa, fd) != 0)
+	  {
+	    addclose_failure = true;
+	    break;
+	  }
+      }
+#ifdef _IO_MTSAFE_IO
+     _IO_lock_unlock (proc_file_chain_lock);
+     _IO_cleanup_region_end (0);
+#endif
+    if (addclose_failure)
+      goto spawn_failure;
+
+    if (__posix_spawn (&((_IO_proc_file *) fp)->pid, _PATH_BSHELL, &fa, 0,
+		     (char *const[]){ (char*) "sh", (char*) "-c",
+		     (char *) command, NULL }, __environ) != 0)
+      {
+      spawn_failure:
+	__posix_spawn_file_actions_destroy (&fa);
+	__close_nocancel (pipe_fds[child_end]);
+	__close_nocancel (pipe_fds[parent_end]);
+	__set_errno (ENOMEM);
+	return NULL;
+      }
+
+    __posix_spawn_file_actions_destroy (&fa);
+  }
+  __close_nocancel (pipe_fds[child_end]);
 
   if (!do_cloexec)
     /* Undo the effects of the pipe2 call which set the
        close-on-exec flag.  */
-    __fcntl (parent_end, F_SETFD, 0);
+    __fcntl (pipe_fds[parent_end], F_SETFD, 0);
 
-  _IO_fileno (fp) = parent_end;
+  _IO_fileno (fp) = pipe_fds[parent_end];
 
   /* Link into proc_file_chain. */
 #ifdef _IO_MTSAFE_IO