diff mbox series

[v5,2/5] pidfd: add PIDFD_SELF_* sentinels to refer to own thread/process

Message ID 788fdfcc9ef602b408951d68097918d6ae379395.1729848252.git.lorenzo.stoakes@oracle.com
State Superseded
Headers show
Series introduce PIDFD_SELF* sentinels | expand

Commit Message

Lorenzo Stoakes Oct. 25, 2024, 9:41 a.m. UTC
It is useful to be able to utilise the pidfd mechanism to reference the
current thread or process (from a userland point of view - thread group
leader from the kernel's point of view).

Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.

For convenience and to avoid confusion from userland's perspective we alias
these:

* PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
  the user will want to use, as they would find it surprising if for
  instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
  and that failed.

* PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
  have no concept of thread groups or what a thread group leader is, and
  from userland's perspective and nomenclature this is what userland
  considers to be a process.

Due to the refactoring of the central __pidfd_get_pid() function we can
implement this functionality centrally, providing the use of this sentinel
in most functionality which utilises pidfd's.

We need to explicitly adjust kernel_waitid_prepare() to permit this (though
it wouldn't really make sense to use this there, we provide the ability for
consistency).

We explicitly disallow use of this in setns(), which would otherwise have
required explicit custom handling, as it doesn't make sense to set the
current calling thread to join the namespace of itself.

As the callers of pidfd_get_pid() expect an increased reference count on
the pid we do so in the self case, reducing churn and avoiding any breakage
from existing logic which decrements this reference count.

This change implicitly provides PIDFD_SELF_* support in the waitid(P_PIDFS,
...), process_madvise(), process_mrelease(), pidfd_send_signal(), and
pidfd_getfd() system calls.

Things such as polling a pidfs and general fd operations are not supported,
this strictly provides the sentinel for APIs which explicitly accept a
pidfd.

Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 include/linux/pid.h        |  8 ++++--
 include/uapi/linux/pidfd.h | 15 +++++++++++
 kernel/exit.c              |  3 ++-
 kernel/nsproxy.c           |  1 +
 kernel/pid.c               | 51 ++++++++++++++++++++++++--------------
 5 files changed, 57 insertions(+), 21 deletions(-)

Comments

Lorenzo Stoakes Oct. 25, 2024, 1:08 p.m. UTC | #1
On Fri, Oct 25, 2024 at 01:50:12PM +0100, Pedro Falcato wrote:
> On Fri, Oct 25, 2024 at 10:41 AM Lorenzo Stoakes
> <lorenzo.stoakes@oracle.com> wrote:
> >
> > It is useful to be able to utilise the pidfd mechanism to reference the
> > current thread or process (from a userland point of view - thread group
> > leader from the kernel's point of view).
> >
> > Therefore introduce PIDFD_SELF_THREAD to refer to the current thread, and
> > PIDFD_SELF_THREAD_GROUP to refer to the current thread group leader.
> >
> > For convenience and to avoid confusion from userland's perspective we alias
> > these:
> >
> > * PIDFD_SELF is an alias for PIDFD_SELF_THREAD - This is nearly always what
> >   the user will want to use, as they would find it surprising if for
> >   instance fd's were unshared()'d and they wanted to invoke pidfd_getfd()
> >   and that failed.
> >
> > * PIDFD_SELF_PROCESS is an alias for PIDFD_SELF_THREAD_GROUP - Most users
> >   have no concept of thread groups or what a thread group leader is, and
> >   from userland's perspective and nomenclature this is what userland
> >   considers to be a process.
> >
> > Due to the refactoring of the central __pidfd_get_pid() function we can
> > implement this functionality centrally, providing the use of this sentinel
> > in most functionality which utilises pidfd's.
> >
> > We need to explicitly adjust kernel_waitid_prepare() to permit this (though
> > it wouldn't really make sense to use this there, we provide the ability for
> > consistency).
> >
> > We explicitly disallow use of this in setns(), which would otherwise have
> > required explicit custom handling, as it doesn't make sense to set the
> > current calling thread to join the namespace of itself.
> >
> > As the callers of pidfd_get_pid() expect an increased reference count on
> > the pid we do so in the self case, reducing churn and avoiding any breakage
> > from existing logic which decrements this reference count.
> >
> > This change implicitly provides PIDFD_SELF_* support in the waitid(P_PIDFS,
> > ...), process_madvise(), process_mrelease(), pidfd_send_signal(), and
> > pidfd_getfd() system calls.
> >
> > Things such as polling a pidfs and general fd operations are not supported,
> > this strictly provides the sentinel for APIs which explicitly accept a
> > pidfd.
> >
> > Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> >  include/linux/pid.h        |  8 ++++--
> >  include/uapi/linux/pidfd.h | 15 +++++++++++
> >  kernel/exit.c              |  3 ++-
> >  kernel/nsproxy.c           |  1 +
> >  kernel/pid.c               | 51 ++++++++++++++++++++++++--------------
> >  5 files changed, 57 insertions(+), 21 deletions(-)
> >
> > diff --git a/include/linux/pid.h b/include/linux/pid.h
> > index d466890e1b35..3b2ac7567a88 100644
> > --- a/include/linux/pid.h
> > +++ b/include/linux/pid.h
> > @@ -78,11 +78,15 @@ struct file;
> >   * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd.
> >   *
> >   * @pidfd:      The pidfd whose pid we want, or the fd of a /proc/<pid> file if
> > - *              @alloc_proc is also set.
> > + *              @alloc_proc is also set, or PIDFD_SELF_* to refer to the current
> > + *              thread or thread group leader.
> >   * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead
> >   *              of a pidfd, and this will be used to determine the pid.
> > +
> >   * @flags:      Output variable, if non-NULL, then the file->f_flags of the
> > - *              pidfd will be set here.
> > + *              pidfd will be set here or If PIDFD_SELF_THREAD is set, this is
> > + *              set to PIDFD_THREAD, otherwise if PIDFD_SELF_THREAD_GROUP then
> > + *              this is set to zero.
> >   *
> >   * Returns: If successful, the pid associated with the pidfd, otherwise an
> >   *          error.
> > diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> > index 565fc0629fff..0ca2ebf906fd 100644
> > --- a/include/uapi/linux/pidfd.h
> > +++ b/include/uapi/linux/pidfd.h
> > @@ -29,4 +29,19 @@
> >  #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
> >  #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
> >
> > +/*
> > + * Special sentinel values which can be used to refer to the current thread or
> > + * thread group leader (which from a userland perspective is the process).
> > + */
> > +#define PIDFD_SELF             PIDFD_SELF_THREAD
> > +#define PIDFD_SELF_PROCESS     PIDFD_SELF_THREAD_GROUP
> > +
> > +#define PIDFD_SELF_THREAD      -100 /* Current thread. */
>
> This conflicts with AT_FDCWD, might be worth changing?
>
> > +#define PIDFD_SELF_THREAD_GROUP        -200 /* Current thread group leader. */
>
> We might want to pick some range outside of the negative errno space
> (-4096 IIRC), since we have plenty of values to pick from (2^31 at
> least).

This is entirely up to Christian, I used the values he suggested in
review. But I agree we should probably find one that doesn't conflict and
is outside that range.

>
> > +static inline int pidfd_is_self_sentinel(pid_t pid)
> > +{
> > +       return pid == PIDFD_SELF_THREAD || pid == PIDFD_SELF_THREAD_GROUP;
> > +}
>
> Do we want this in the uapi header? Even if this is useful, it might
> come with several drawbacks such as breaking scripts that parse kernel
> headers (and a quick git grep suggests we do have static inlines in
> headers, but in rather obscure ones) and breaking C89:
>
> <source>:8:8: error: unknown type name 'inline'
>     8 | static inline int pidfd_is_self_sentinel(pid_t pid)
>
> :)

It doesn't really make sense to put it anywhere else I don't think.

I'm not sure 'support compilers that don't know what inline is' is a
requirement for UAPI. Nor do I suspect people using such strict ansi-c89
compilers will be importing linux/pidfd.h... :)

Also:

[~/kerndev/kernels/mm/include/uapi/linux]$ ag inline | wc -l
382

I mean yeah 'obscure' or not it seems this is an acceptable thing to do :)

>
> > +
> >  #endif /* _UAPI_LINUX_PIDFD_H */
> > diff --git a/kernel/exit.c b/kernel/exit.c
> > index 619f0014c33b..3eb20f8252ee 100644
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -71,6 +71,7 @@
> >  #include <linux/user_events.h>
> >  #include <linux/uaccess.h>
> >
> > +#include <uapi/linux/pidfd.h>
> >  #include <uapi/linux/wait.h>
> >
> >  #include <asm/unistd.h>
> > @@ -1739,7 +1740,7 @@ int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
> >                 break;
> >         case P_PIDFD:
> >                 type = PIDTYPE_PID;
> > -               if (upid < 0)
> > +               if (upid < 0 && !pidfd_is_self_sentinel(upid))
> >                         return -EINVAL;
> >
> >                 pid = pidfd_get_pid(upid, &f_flags);
> > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> > index dc952c3b05af..d239f7eeaa1f 100644
> > --- a/kernel/nsproxy.c
> > +++ b/kernel/nsproxy.c
> > @@ -550,6 +550,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, flags)
> >         struct nsset nsset = {};
> >         int err = 0;
> >
> > +       /* If fd is PIDFD_SELF_*, implicitly fail here, as invalid. */
> >         if (!fd_file(f))
> >                 return -EBADF;
> >
> > diff --git a/kernel/pid.c b/kernel/pid.c
> > index 94c97559e5c5..8742157b36f8 100644
> > --- a/kernel/pid.c
> > +++ b/kernel/pid.c
> > @@ -535,33 +535,48 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
> >  }
> >  EXPORT_SYMBOL_GPL(find_ge_pid);
> >
> > +static struct pid *pidfd_get_pid_self(unsigned int pidfd, unsigned int *flags)
> > +{
> > +       bool is_thread = pidfd == PIDFD_SELF_THREAD;
> > +       enum pid_type type = is_thread ? PIDTYPE_PID : PIDTYPE_TGID;
> > +       struct pid *pid = *task_pid_ptr(current, type);
> > +
> > +       /* The caller expects an elevated reference count. */
> > +       get_pid(pid);
>
> It would be really really nice to avoid the get here, but I imagine
> it'll take some refactoring around put_pid's?

I cover this in the commit message and have addressed it on review already,
but to risk repeating myself :)

Yes it'd be nice, but then you would have to make sure you _always_
unpinned correctly _everywhere_ from here on in, and it makes the behaviour
different for these self modes.

You'd need to change how everyone everywhere puts and... yeah. It's not a
big deal to do a useless ref inc here I don't think, eliminates a class of
bug, and importantly it keeps behaviour identical to if you do a self-pidfd
in the 'manual' way.

I equally dislike this aspect, but doing it this way also enables us to
implement this in this one place and get self pidfd support 'for free'
everywhere.

So I think RoI-wise this is a better proposition than the alternative.

>
> > +       return pid;
> > +}
> > +
> >  struct pid *__pidfd_get_pid(unsigned int pidfd, bool allow_proc,
> >                             unsigned int *flags)
> >  {
> > -       struct pid *pid;
> > -       struct fd f = fdget(pidfd);
> > -       struct file *file = fd_file(f);
> > +       if (pidfd_is_self_sentinel(pidfd)) {
> > +               return pidfd_get_pid_self(pidfd, flags);
> > +       } else {
>
> Skipping the else here might make the rest of the code more legible
> (since the sentinel branch returns anyway...).

This is so we can declare types for the other branch without having to
figure out how to assign the struct fd sensibly.

Normally I'm a big fan of the if (!...) { return ... } guard pattern, but
it's because of the 'types first' requirement of kernel code that I do this
here.

>
> --
> Pedro
John Hubbard Oct. 25, 2024, 6:44 p.m. UTC | #2
On 10/25/24 11:38 AM, Pedro Falcato wrote:
> On Fri, Oct 25, 2024 at 6:41 PM John Hubbard <jhubbard@nvidia.com> wrote:
>>
>> On 10/25/24 5:50 AM, Pedro Falcato wrote:
>>> On Fri, Oct 25, 2024 at 10:41 AM Lorenzo Stoakes
>>> <lorenzo.stoakes@oracle.com> wrote:
>> ...
>>>> +static inline int pidfd_is_self_sentinel(pid_t pid)
>>>> +{
>>>> +       return pid == PIDFD_SELF_THREAD || pid == PIDFD_SELF_THREAD_GROUP;
>>>> +}
>>>
>>> Do we want this in the uapi header? Even if this is useful, it might
>>> come with several drawbacks such as breaking scripts that parse kernel
>>> headers (and a quick git grep suggests we do have static inlines in
>>> headers, but in rather obscure ones) and breaking C89:
>>>
>>
>> Let's please not say "C89" anymore, we've moved on! :)
>>
>> The notes in [1], which is now nearly 2.5 years old, discuss the move to
>> C11, and specifically how to handle the inline keyword.
> 
> That seems to only apply to the kernel internally, uapi headers are

Yes.

> included from userspace too (-std=c89 -pedantic doesn't know what a
> gnu extension is). And uapi headers _generally_ keep to defining
> constants and structs, nothing more.

OK

> I don't know what the guidelines for uapi headers are nowadays, but we
> generally want to not break userspace.
> 
>>
>> I think it's quite clear at this point, that we should not hold up new
>> work, based on concerns about handling the inline keyword, nor about
>> C89.
> 
> Right, but the correct solution is probably to move
> pidfd_is_self_sentinel to some other place, since it's not even
> supposed to be used by userspace (it's semantically useless to
> userspace, and it's only two users are in the kernel, kernel/pid.c and
> exit.c).
> 

Yes, if userspace absolutely doesn't need nor want this, then putting
it in a non-uapi header does sound like the right move.


thanks,
Lorenzo Stoakes Oct. 25, 2024, 7:49 p.m. UTC | #3
On Fri, Oct 25, 2024 at 11:44:34AM -0700, John Hubbard wrote:
> On 10/25/24 11:38 AM, Pedro Falcato wrote:
> > On Fri, Oct 25, 2024 at 6:41 PM John Hubbard <jhubbard@nvidia.com> wrote:
> > >
> > > On 10/25/24 5:50 AM, Pedro Falcato wrote:
> > > > On Fri, Oct 25, 2024 at 10:41 AM Lorenzo Stoakes
> > > > <lorenzo.stoakes@oracle.com> wrote:
> > > ...
> > > > > +static inline int pidfd_is_self_sentinel(pid_t pid)
> > > > > +{
> > > > > +       return pid == PIDFD_SELF_THREAD || pid == PIDFD_SELF_THREAD_GROUP;
> > > > > +}
> > > >
> > > > Do we want this in the uapi header? Even if this is useful, it might
> > > > come with several drawbacks such as breaking scripts that parse kernel
> > > > headers (and a quick git grep suggests we do have static inlines in
> > > > headers, but in rather obscure ones) and breaking C89:
> > > >
> > >
> > > Let's please not say "C89" anymore, we've moved on! :)
> > >
> > > The notes in [1], which is now nearly 2.5 years old, discuss the move to
> > > C11, and specifically how to handle the inline keyword.
> >
> > That seems to only apply to the kernel internally, uapi headers are
>
> Yes.
>
> > included from userspace too (-std=c89 -pedantic doesn't know what a
> > gnu extension is). And uapi headers _generally_ keep to defining
> > constants and structs, nothing more.
>
> OK

Because a lot of people using -ANSI- C89 are importing a very new linux
feature header.

And let's ignore the hundreds of existing uses... OK.

The rules, unstated anywhere, are that we must support 1972-era C in an
optional header for a feature available only in new kernels because
somebody somewhere is using a VAX-11 and gosh darn it they can't change
their toolchain!

And you had better make sure you don't wear out those tape drums...

>
> > I don't know what the guidelines for uapi headers are nowadays, but we
> > generally want to not break userspace.
> >
> > >
> > > I think it's quite clear at this point, that we should not hold up new
> > > work, based on concerns about handling the inline keyword, nor about
> > > C89.
> >
> > Right, but the correct solution is probably to move
> > pidfd_is_self_sentinel to some other place, since it's not even
> > supposed to be used by userspace (it's semantically useless to
> > userspace, and it's only two users are in the kernel, kernel/pid.c and
> > exit.c).
> >
>
> Yes, if userspace absolutely doesn't need nor want this, then putting
> it in a non-uapi header does sound like the right move.

The bike shed should be blue! Wait no no, it should be red... Hang on
yellow yes! Yellow's great!

No wait - did we _test_ yellow in the way I wanted...

I mean for me this isn't a big deal - we declare the defines here, it makes
sense to have a very very simple inline function.

It's not like userspace is overly hurt by this...

Also I did explain there's no obvious header to put this in in the kernel
and I'm not introducing one sorry.

ANyway if you guys feel strong enough about this, I'll respin again and
just open-code this trivial check where it's used.

>
>
> thanks,
> --
> John Hubbard
>
John Hubbard Oct. 25, 2024, 9:51 p.m. UTC | #4
On 10/25/24 2:09 PM, Lorenzo Stoakes wrote:
> On Fri, Oct 25, 2024 at 01:31:49PM -0700, John Hubbard wrote:
>> On 10/25/24 12:49 PM, Lorenzo Stoakes wrote:
>>> On Fri, Oct 25, 2024 at 11:44:34AM -0700, John Hubbard wrote:
>>>> On 10/25/24 11:38 AM, Pedro Falcato wrote:
>>>>> On Fri, Oct 25, 2024 at 6:41 PM John Hubbard <jhubbard@nvidia.com> wrote:
>> ...
>> I'll admit to being easily cowed by "you're breaking userspace" arguments.
>> Even when they start to get rather absurd. Because I can't easily tell where
>> the line is.
>>
>> Maybe "-std=c89 -pedantic" is on the other side of the line. I'd like it
>> to be! :)
> 
> Well, apparently not...

Why not? Your arguments are clear and reasonable. Why shouldn't they prevail?

Please don't think that I have some sort of firm position here. I'm simply
looking for the right answer. And if that's different than something I
proposed earlier, no problem. The best answer should win.

...
>>> The bike shed should be blue! Wait no no, it should be red... Hang on
>>> yellow yes! Yellow's great!
>>
>> Putting a header in the right location, so as to avoid breakage here or
>> there, is not bikeshedding. Sorry.
> 
> There are 312 uses of "static inline" already in UAPI headers, not all
> quite as obscure as claimed.
> 

OK, good. Let's lead with that. It seems very clear, then, that a new one
won't cause a problem.

> Specifically requiring me and only me to support ansi C89 for a theorised
> scenario is in my opinion bikeshedding, but I don't want to get into an
> argument about something so petty :)

An argument about the definition of bikeshedding sounds delightfully
recursive, but yes, let's not. :)

...
>>> ANyway if you guys feel strong enough about this, I'll respin again and
>>> just open-code this trivial check where it's used.
>>
>> No strong feelings, just hoping to help make a choice that gets you
>> closer to getting your patches committed.
> 
> I mean, you are saying I am breaking things and implying the series is
> blocked on this, that sounds like a strong opinion, but again I'm not going
> to argue.

Actually, Pedro's request kicked this off, and I was hoping to dismiss
it--again, in order to help move things along. My opinion is that we
should shun ancient toolchains and ancient systems whenever possible.

Somehow that got turned into "I'm trying to block the patchset". Really,
whatever works, follows The Rules (whatever we eventually understand
them to be), and doesn't cause someone *else* to come out of the
woodwork and claim a problem, is fine with me.

> 
> As with the requirement that I, only for my part of the change, must fix up
> test header import, while I disagree I should be doing the fix, I did it
> anyway as I am accommodating and reasonable.

I agree that pre-existing problems in selftests should not be your
problem.

By the way, I'm occasionally involved in helping fix up various
selftest-related problems, especially when they impact mm. Send me a
note if you have anything in mind that ought to be fixed up, I might be
able to help head off future grief in that area.

> 
> So fine - I'll respin and just open-code this as it's trivial and there's
> no (other) sensible place to put it anyway.
> 
> A P.S. though - a very NOT theoretical issue with userspace is the import
> of linux/fcntl.h in pidfd.h which seems to me to have been imported solely
> for the kernel's sake.
> 
> A gentle suggestion (it seems I can't win - gentle suggestions are ignored,
> tongue-in-cheek parody is taken to be mean... but anyway) is to do

Actually, these come across as sarcasm, especially in the context of
these emails that show you are becoming quite distraught.

I've met you several times at the conferences. We get along well. And
your work is top notch. So please consider that I'm very much supportive
of you and your work here.

I'm still trying to understand why you are recently sending these very
strong emails (Vlastimil also took some heat), but I see that you also
mentioned some long hours.

If my feedback is making things worse here, I'll try to adjust.
Selftests in general are a frustrating area.


thanks,
diff mbox series

Patch

diff --git a/include/linux/pid.h b/include/linux/pid.h
index d466890e1b35..3b2ac7567a88 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -78,11 +78,15 @@  struct file;
  * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd.
  *
  * @pidfd:      The pidfd whose pid we want, or the fd of a /proc/<pid> file if
- *              @alloc_proc is also set.
+ *              @alloc_proc is also set, or PIDFD_SELF_* to refer to the current
+ *              thread or thread group leader.
  * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead
  *              of a pidfd, and this will be used to determine the pid.
+
  * @flags:      Output variable, if non-NULL, then the file->f_flags of the
- *              pidfd will be set here.
+ *              pidfd will be set here or If PIDFD_SELF_THREAD is set, this is
+ *              set to PIDFD_THREAD, otherwise if PIDFD_SELF_THREAD_GROUP then
+ *              this is set to zero.
  *
  * Returns: If successful, the pid associated with the pidfd, otherwise an
  *          error.
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..0ca2ebf906fd 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -29,4 +29,19 @@ 
 #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
 #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
 
+/*
+ * Special sentinel values which can be used to refer to the current thread or
+ * thread group leader (which from a userland perspective is the process).
+ */
+#define PIDFD_SELF		PIDFD_SELF_THREAD
+#define PIDFD_SELF_PROCESS	PIDFD_SELF_THREAD_GROUP
+
+#define PIDFD_SELF_THREAD	-100 /* Current thread. */
+#define PIDFD_SELF_THREAD_GROUP	-200 /* Current thread group leader. */
+
+static inline int pidfd_is_self_sentinel(pid_t pid)
+{
+	return pid == PIDFD_SELF_THREAD || pid == PIDFD_SELF_THREAD_GROUP;
+}
+
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 619f0014c33b..3eb20f8252ee 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -71,6 +71,7 @@ 
 #include <linux/user_events.h>
 #include <linux/uaccess.h>
 
+#include <uapi/linux/pidfd.h>
 #include <uapi/linux/wait.h>
 
 #include <asm/unistd.h>
@@ -1739,7 +1740,7 @@  int kernel_waitid_prepare(struct wait_opts *wo, int which, pid_t upid,
 		break;
 	case P_PIDFD:
 		type = PIDTYPE_PID;
-		if (upid < 0)
+		if (upid < 0 && !pidfd_is_self_sentinel(upid))
 			return -EINVAL;
 
 		pid = pidfd_get_pid(upid, &f_flags);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index dc952c3b05af..d239f7eeaa1f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -550,6 +550,7 @@  SYSCALL_DEFINE2(setns, int, fd, int, flags)
 	struct nsset nsset = {};
 	int err = 0;
 
+	/* If fd is PIDFD_SELF_*, implicitly fail here, as invalid. */
 	if (!fd_file(f))
 		return -EBADF;
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 94c97559e5c5..8742157b36f8 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -535,33 +535,48 @@  struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 }
 EXPORT_SYMBOL_GPL(find_ge_pid);
 
+static struct pid *pidfd_get_pid_self(unsigned int pidfd, unsigned int *flags)
+{
+	bool is_thread = pidfd == PIDFD_SELF_THREAD;
+	enum pid_type type = is_thread ? PIDTYPE_PID : PIDTYPE_TGID;
+	struct pid *pid = *task_pid_ptr(current, type);
+
+	/* The caller expects an elevated reference count. */
+	get_pid(pid);
+	return pid;
+}
+
 struct pid *__pidfd_get_pid(unsigned int pidfd, bool allow_proc,
 			    unsigned int *flags)
 {
-	struct pid *pid;
-	struct fd f = fdget(pidfd);
-	struct file *file = fd_file(f);
+	if (pidfd_is_self_sentinel(pidfd)) {
+		return pidfd_get_pid_self(pidfd, flags);
+	} else {
+		struct pid *pid;
+		struct fd f = fdget(pidfd);
+		struct file *file = fd_file(f);
 
-	if (!file)
-		return ERR_PTR(-EBADF);
+		if (!file)
+			return ERR_PTR(-EBADF);
 
-	pid = pidfd_pid(file);
-	/* If we allow opening a pidfd via /proc/<pid>, do so. */
-	if (IS_ERR(pid) && allow_proc)
-		pid = tgid_pidfd_to_pid(file);
+		pid = pidfd_pid(file);
+		/* If we allow opening a pidfd via /proc/<pid>, do so. */
+		if (IS_ERR(pid) && allow_proc)
+			pid = tgid_pidfd_to_pid(file);
 
-	if (IS_ERR(pid)) {
+		if (IS_ERR(pid)) {
+			fdput(f);
+			return pid;
+		}
+
+		/* Pin pid before we release fd. */
+		get_pid(pid);
+		if (flags)
+			*flags = file->f_flags;
 		fdput(f);
+
 		return pid;
 	}
-
-	/* Pin pid before we release fd. */
-	get_pid(pid);
-	if (flags)
-		*flags = file->f_flags;
-	fdput(f);
-
-	return pid;
 }
 
 /**