[v3,3/3] syscalls/tgkill03: add new test

Message ID 1552978181-27748-4-git-send-email-sumit.garg@linaro.org
State New
Headers show
Series
  • syscalls: add tgkill test-cases
Related show

Commit Message

Sumit Garg March 19, 2019, 6:49 a.m.
From: Greg Hackmann <ghackmann@google.com>

Test simple tgkill() error cases.

Signed-off-by: Greg Hackmann <ghackmann@google.com>
Signed-off-by: Sumit Garg <sumit.garg@linaro.org>
Reviewed-by: Li Wang <liwang@redhat.com>
---
 runtest/syscalls                            |   1 +
 testcases/kernel/syscalls/tgkill/.gitignore |   1 +
 testcases/kernel/syscalls/tgkill/tgkill03.c | 110 ++++++++++++++++++++++++++++
 3 files changed, 112 insertions(+)
 create mode 100644 testcases/kernel/syscalls/tgkill/tgkill03.c

Comments

Cyril Hrubis March 19, 2019, 1:56 p.m. | #1
Hi!
I've added one more ESRCH case here:

diff --git a/testcases/kernel/syscalls/tgkill/tgkill03.c b/testcases/kernel/syscalls/tgkill/tgkill03.c
index b78e9d95a..f5bbdc5a8 100644
--- a/testcases/kernel/syscalls/tgkill/tgkill03.c
+++ b/testcases/kernel/syscalls/tgkill/tgkill03.c
@@ -76,7 +76,8 @@ static const struct testcase {
        { "Invalid tgid", &invalid_pid, &parent_tid, SIGUSR1, EINVAL },
        { "Invalid tid", &parent_tgid, &invalid_pid, SIGUSR1, EINVAL },
        { "Invalid signal", &parent_tgid, &parent_tid, -1, EINVAL },
-       { "Defunct thread ID", &parent_tgid, &defunct_tid, SIGUSR1, ESRCH },
+       { "Defunct tid", &parent_tgid, &defunct_tid, SIGUSR1, ESRCH },
+       { "Defunct tgid", &defunct_tid, &child_tid, SIGUSR1, ESRCH },
        { "Valid tgkill call", &parent_tgid, &child_tid, SIGUSR1, 0 },
 };
Jan Stancek June 15, 2019, 11:08 p.m. | #2
----- Original Message -----
> +static void setup(void)
> +{
> +	sigset_t sigusr1;
> +	pthread_t defunct_thread;
> +
> +	sigemptyset(&sigusr1);
> +	sigaddset(&sigusr1, SIGUSR1);
> +	pthread_sigmask(SIG_BLOCK, &sigusr1, NULL);
> +
> +	parent_tgid = getpid();
> +	parent_tid = sys_gettid();
> +
> +	SAFE_PTHREAD_CREATE(&child_thread, NULL, child_thread_func, NULL);
> +
> +	TST_CHECKPOINT_WAIT(0);

I'm seeing reports of this test failing on s390x:

st_test.c:1096: INFO: Timeout per run is 0h 05m 00s
tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL
tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL
tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL
tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS
tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH
tgkill03.c:99: PASS: Valid tgkill call succeeded

and I suspect this piece:

> +
> +	SAFE_PTHREAD_CREATE(&defunct_thread, NULL, defunct_thread_func, NULL);
> +
> +	SAFE_PTHREAD_JOIN(defunct_thread, NULL);
> +}

glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,
and then resumes. Which kernel does at:
  do_exit
    exit_mm
      mm_release
        put_user(0, tsk->clear_child_tid);

so there's still work to be done after that, and I suspect tid is still valid
while that happens.

My first idea: wait until /proc/pid/task/<tid> disappears.

Regards,
Jan
Li Wang June 16, 2019, 4:22 a.m. | #3
On Sun, Jun 16, 2019 at 7:08 AM Jan Stancek <jstancek@redhat.com> wrote:

>

> ----- Original Message -----

> > +static void setup(void)

> > +{

> > +     sigset_t sigusr1;

> > +     pthread_t defunct_thread;

> > +

> > +     sigemptyset(&sigusr1);

> > +     sigaddset(&sigusr1, SIGUSR1);

> > +     pthread_sigmask(SIG_BLOCK, &sigusr1, NULL);

> > +

> > +     parent_tgid = getpid();

> > +     parent_tid = sys_gettid();

> > +

> > +     SAFE_PTHREAD_CREATE(&child_thread, NULL, child_thread_func, NULL);

> > +

> > +     TST_CHECKPOINT_WAIT(0);

>

> I'm seeing reports of this test failing on s390x:

>

> st_test.c:1096: INFO: Timeout per run is 0h 05m 00s

> tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL

> tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL

> tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL

> tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS

> tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH

> tgkill03.c:99: PASS: Valid tgkill call succeeded

>

> and I suspect this piece:

>

> > +

> > +     SAFE_PTHREAD_CREATE(&defunct_thread, NULL, defunct_thread_func,

> NULL);

> > +

> > +     SAFE_PTHREAD_JOIN(defunct_thread, NULL);

> > +}

>

> glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,

> and then resumes. Which kernel does at:

>   do_exit

>     exit_mm

>       mm_release

>         put_user(0, tsk->clear_child_tid);

>

> so there's still work to be done after that, and I suspect tid is still

> valid

> while that happens.

>

> My first idea: wait until /proc/pid/task/<tid> disappears.

>


The anaysis is probably right, but this idea doesn't work for me. Seems
/proc/pid/task/<tid> is not the key point to confirm that tid has been
clear.

I just have a try as below:

===========
# for i in `seq 1000`; do echo "i = $i" && ./tgkill03 || break; done
...
i = 96
tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s
tgkill03.c:106: FAIL: Defunct tid should have failed with ESRCH: SUCCESS

===========
--- a/testcases/kernel/syscalls/tgkill/tgkill03.c
+++ b/testcases/kernel/syscalls/tgkill/tgkill03.c
@@ -5,6 +5,7 @@
  * Test simple tgkill() error cases.
  */

+#include <stdio.h>
 #include <pthread.h>
 #include <pwd.h>
 #include <sys/types.h>
@@ -19,6 +20,7 @@ static pid_t parent_tgid;
 static pid_t parent_tid;
 static pid_t child_tid;
 static pid_t defunct_tid;
+char buf[1024];

 static const int invalid_pid = -1;

@@ -35,6 +37,8 @@ static void *defunct_thread_func(void *arg)
 {
        defunct_tid = sys_gettid();

+       sprintf(buf, "/proc/pid/task/%d", defunct_tid);
+
        return arg;
 }

@@ -73,18 +77,23 @@ static const struct testcase {
        const int sig;
        const int err;
 } testcases[] = {
-       { "Invalid tgid", &invalid_pid, &parent_tid, SIGUSR1, EINVAL },
-       { "Invalid tid", &parent_tgid, &invalid_pid, SIGUSR1, EINVAL },
-       { "Invalid signal", &parent_tgid, &parent_tid, -1, EINVAL },
+//     { "Invalid tgid", &invalid_pid, &parent_tid, SIGUSR1, EINVAL },
+//     { "Invalid tid", &parent_tgid, &invalid_pid, SIGUSR1, EINVAL },
+//     { "Invalid signal", &parent_tgid, &parent_tid, -1, EINVAL },
        { "Defunct tid", &parent_tgid, &defunct_tid, SIGUSR1, ESRCH },
-       { "Defunct tgid", &defunct_tid, &child_tid, SIGUSR1, ESRCH },
-       { "Valid tgkill call", &parent_tgid, &child_tid, SIGUSR1, 0 },
+//     { "Defunct tgid", &defunct_tid, &child_tid, SIGUSR1, ESRCH },
+//     { "Valid tgkill call", &parent_tgid, &child_tid, SIGUSR1, 0 },
 };

 static void run(unsigned int i)
 {
        const struct testcase *tc = &testcases[i];

+       // debug code //
+       while (access(buf, F_OK) == 0) {
+               tst_res(TINFO, "Debug: %s still exist!", buf);
+       }
+
        TEST(sys_tgkill(*tc->tgid, *tc->tid, tc->sig));
        if (tc->err) {
                if (TST_RET < 0 && TST_ERR == tc->err)


-- 
Regards,
Li Wang
<div dir="ltr"><div dir="ltr"><div class="gmail_default" style="font-size:small"><br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Sun, Jun 16, 2019 at 7:08 AM Jan Stancek &lt;<a href="mailto:jstancek@redhat.com">jstancek@redhat.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br>
----- Original Message -----<br>
&gt; +static void setup(void)<br>
&gt; +{<br>
&gt; +     sigset_t sigusr1;<br>
&gt; +     pthread_t defunct_thread;<br>
&gt; +<br>
&gt; +     sigemptyset(&amp;sigusr1);<br>
&gt; +     sigaddset(&amp;sigusr1, SIGUSR1);<br>
&gt; +     pthread_sigmask(SIG_BLOCK, &amp;sigusr1, NULL);<br>
&gt; +<br>
&gt; +     parent_tgid = getpid();<br>
&gt; +     parent_tid = sys_gettid();<br>
&gt; +<br>
&gt; +     SAFE_PTHREAD_CREATE(&amp;child_thread, NULL, child_thread_func, NULL);<br>
&gt; +<br>
&gt; +     TST_CHECKPOINT_WAIT(0);<br>
<br>
I&#39;m seeing reports of this test failing on s390x:<br>
<br>
st_test.c:1096: INFO: Timeout per run is 0h 05m 00s<br>
tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL<br>
tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL<br>
tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL<br>
tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS<br>
tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH<br>
tgkill03.c:99: PASS: Valid tgkill call succeeded<br>
<br>
and I suspect this piece:<br>
<br>
&gt; +<br>
&gt; +     SAFE_PTHREAD_CREATE(&amp;defunct_thread, NULL, defunct_thread_func, NULL);<br>
&gt; +<br>
&gt; +     SAFE_PTHREAD_JOIN(defunct_thread, NULL);<br>
&gt; +}<br>
<br>
glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,<br>
and then resumes. Which kernel does at:<br>
  do_exit<br>
    exit_mm<br>
      mm_release<br>
        put_user(0, tsk-&gt;clear_child_tid);<br>
<br>
so there&#39;s still work to be done after that, and I suspect tid is still valid<br>
while that happens.<br>
<br>
My first idea: wait until /proc/pid/task/&lt;tid&gt; disappears.<br></blockquote><div><br></div><div class="gmail_default" style="font-size:small">The anaysis is probably right, but this idea doesn&#39;t work for me. Seems /proc/pid/task/&lt;tid&gt; is not the key point to confirm that tid has been clear.</div><div class="gmail_default" style="font-size:small"><br></div><div class="gmail_default" style="font-size:small">I just have a try as below:</div><div class="gmail_default" style="font-size:small"><br></div><div class="gmail_default" style="font-size:small"><div class="gmail_default">===========<br class="gmail-Apple-interchange-newline"># for i in `seq 1000`; do echo &quot;i = $i&quot; &amp;&amp; ./tgkill03 || break; done<br></div><div class="gmail_default">...</div>i = 96<br>tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s<br>tgkill03.c:106: FAIL: Defunct tid should have failed with ESRCH: SUCCESS<br></div><div class="gmail_default" style="font-size:small"><br></div><div class="gmail_default" style="font-size:small">===========</div><div class="gmail_default" style="font-size:small">--- a/testcases/kernel/syscalls/tgkill/tgkill03.c<br>+++ b/testcases/kernel/syscalls/tgkill/tgkill03.c<br>@@ -5,6 +5,7 @@<br>  * Test simple tgkill() error cases.<br>  */<br> <br>+#include &lt;stdio.h&gt;<br> #include &lt;pthread.h&gt;<br> #include &lt;pwd.h&gt;<br> #include &lt;sys/types.h&gt;<br>@@ -19,6 +20,7 @@ static pid_t parent_tgid;<br> static pid_t parent_tid;<br> static pid_t child_tid;<br> static pid_t defunct_tid;<br>+char buf[1024];<br> <br> static const int invalid_pid = -1;<br> <br>@@ -35,6 +37,8 @@ static void *defunct_thread_func(void *arg)<br> {<br>        defunct_tid = sys_gettid();<br> <br>+       sprintf(buf, &quot;/proc/pid/task/%d&quot;, defunct_tid);<br>+<br>        return arg;<br> }<br> <br>@@ -73,18 +77,23 @@ static const struct testcase {<br>        const int sig;<br>        const int err;<br> } testcases[] = {<br>-       { &quot;Invalid tgid&quot;, &amp;invalid_pid, &amp;parent_tid, SIGUSR1, EINVAL },<br>-       { &quot;Invalid tid&quot;, &amp;parent_tgid, &amp;invalid_pid, SIGUSR1, EINVAL },<br>-       { &quot;Invalid signal&quot;, &amp;parent_tgid, &amp;parent_tid, -1, EINVAL },<br>+//     { &quot;Invalid tgid&quot;, &amp;invalid_pid, &amp;parent_tid, SIGUSR1, EINVAL },<br>+//     { &quot;Invalid tid&quot;, &amp;parent_tgid, &amp;invalid_pid, SIGUSR1, EINVAL },<br>+//     { &quot;Invalid signal&quot;, &amp;parent_tgid, &amp;parent_tid, -1, EINVAL },<br>        { &quot;Defunct tid&quot;, &amp;parent_tgid, &amp;defunct_tid, SIGUSR1, ESRCH },<br>-       { &quot;Defunct tgid&quot;, &amp;defunct_tid, &amp;child_tid, SIGUSR1, ESRCH },<br>-       { &quot;Valid tgkill call&quot;, &amp;parent_tgid, &amp;child_tid, SIGUSR1, 0 },<br>+//     { &quot;Defunct tgid&quot;, &amp;defunct_tid, &amp;child_tid, SIGUSR1, ESRCH },<br>+//     { &quot;Valid tgkill call&quot;, &amp;parent_tgid, &amp;child_tid, SIGUSR1, 0 },<br> };<br> <br> static void run(unsigned int i)<br> {<br>        const struct testcase *tc = &amp;testcases[i];<br> <br>+       // debug code //<br>+       while (access(buf, F_OK) == 0) {<br>+               tst_res(TINFO, &quot;Debug: %s still exist!&quot;, buf);<br>+       }<br>+<br>        TEST(sys_tgkill(*tc-&gt;tgid, *tc-&gt;tid, tc-&gt;sig));<br>        if (tc-&gt;err) {<br>                if (TST_RET &lt; 0 &amp;&amp; TST_ERR == tc-&gt;err)<br></div><div class="gmail_default" style="font-size:small"><br></div></div><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><div>Regards,<br></div><div>Li Wang<br></div></div></div></div>
Jan Stancek June 16, 2019, 7:04 a.m. | #4
----- Original Message -----
> On Sun, Jun 16, 2019 at 7:08 AM Jan Stancek <jstancek@redhat.com> wrote:
> 
> >
> > ----- Original Message -----
> > > +static void setup(void)
> > > +{
> > > +     sigset_t sigusr1;
> > > +     pthread_t defunct_thread;
> > > +
> > > +     sigemptyset(&sigusr1);
> > > +     sigaddset(&sigusr1, SIGUSR1);
> > > +     pthread_sigmask(SIG_BLOCK, &sigusr1, NULL);
> > > +
> > > +     parent_tgid = getpid();
> > > +     parent_tid = sys_gettid();
> > > +
> > > +     SAFE_PTHREAD_CREATE(&child_thread, NULL, child_thread_func, NULL);
> > > +
> > > +     TST_CHECKPOINT_WAIT(0);
> >
> > I'm seeing reports of this test failing on s390x:
> >
> > st_test.c:1096: INFO: Timeout per run is 0h 05m 00s
> > tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL
> > tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL
> > tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL
> > tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS
> > tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH
> > tgkill03.c:99: PASS: Valid tgkill call succeeded
> >
> > and I suspect this piece:
> >
> > > +
> > > +     SAFE_PTHREAD_CREATE(&defunct_thread, NULL, defunct_thread_func,
> > NULL);
> > > +
> > > +     SAFE_PTHREAD_JOIN(defunct_thread, NULL);
> > > +}
> >
> > glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,
> > and then resumes. Which kernel does at:
> >   do_exit
> >     exit_mm
> >       mm_release
> >         put_user(0, tsk->clear_child_tid);
> >
> > so there's still work to be done after that, and I suspect tid is still
> > valid
> > while that happens.
> >
> > My first idea: wait until /proc/pid/task/<tid> disappears.
> >
> 
> The anaysis is probably right, but this idea doesn't work for me. Seems
> /proc/pid/task/<tid> is not the key point to confirm that tid has been
> clear.
> 
> I just have a try as below:
> 
> ===========
> # for i in `seq 1000`; do echo "i = $i" && ./tgkill03 || break; done
> ...
> i = 96
> tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s
> tgkill03.c:106: FAIL: Defunct tid should have failed with ESRCH: SUCCESS
> 
> ===========
> --- a/testcases/kernel/syscalls/tgkill/tgkill03.c
> +++ b/testcases/kernel/syscalls/tgkill/tgkill03.c
> @@ -5,6 +5,7 @@
>   * Test simple tgkill() error cases.
>   */
> 
> +#include <stdio.h>
>  #include <pthread.h>
>  #include <pwd.h>
>  #include <sys/types.h>
> @@ -19,6 +20,7 @@ static pid_t parent_tgid;
>  static pid_t parent_tid;
>  static pid_t child_tid;
>  static pid_t defunct_tid;
> +char buf[1024];
> 
>  static const int invalid_pid = -1;
> 
> @@ -35,6 +37,8 @@ static void *defunct_thread_func(void *arg)
>  {
>         defunct_tid = sys_gettid();
> 
> +       sprintf(buf, "/proc/pid/task/%d", defunct_tid);

How about?
          sprintf(buf, "/proc/%d/task/%d", getpid(), defunct_tid);
Li Wang June 16, 2019, 7:19 a.m. | #5
On Sun, Jun 16, 2019 at 3:04 PM Jan Stancek <jstancek@redhat.com> wrote:

>

>

> ----- Original Message -----

> > On Sun, Jun 16, 2019 at 7:08 AM Jan Stancek <jstancek@redhat.com> wrote:

> >

> > >

> > > ----- Original Message -----

> > > > +static void setup(void)

> > > > +{

> > > > +     sigset_t sigusr1;

> > > > +     pthread_t defunct_thread;

> > > > +

> > > > +     sigemptyset(&sigusr1);

> > > > +     sigaddset(&sigusr1, SIGUSR1);

> > > > +     pthread_sigmask(SIG_BLOCK, &sigusr1, NULL);

> > > > +

> > > > +     parent_tgid = getpid();

> > > > +     parent_tid = sys_gettid();

> > > > +

> > > > +     SAFE_PTHREAD_CREATE(&child_thread, NULL, child_thread_func,

> NULL);

> > > > +

> > > > +     TST_CHECKPOINT_WAIT(0);

> > >

> > > I'm seeing reports of this test failing on s390x:

> > >

> > > st_test.c:1096: INFO: Timeout per run is 0h 05m 00s

> > > tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL

> > > tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL

> > > tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL

> > > tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS

> > > tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH

> > > tgkill03.c:99: PASS: Valid tgkill call succeeded

> > >

> > > and I suspect this piece:

> > >

> > > > +

> > > > +     SAFE_PTHREAD_CREATE(&defunct_thread, NULL, defunct_thread_func,

> > > NULL);

> > > > +

> > > > +     SAFE_PTHREAD_JOIN(defunct_thread, NULL);

> > > > +}

> > >

> > > glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,

> > > and then resumes. Which kernel does at:

> > >   do_exit

> > >     exit_mm

> > >       mm_release

> > >         put_user(0, tsk->clear_child_tid);

> > >

> > > so there's still work to be done after that, and I suspect tid is still

> > > valid

> > > while that happens.

> > >

> > > My first idea: wait until /proc/pid/task/<tid> disappears.

> > >

> >

> > The anaysis is probably right, but this idea doesn't work for me. Seems

> > /proc/pid/task/<tid> is not the key point to confirm that tid has been

> > clear.

> >

> > I just have a try as below:

> >

> > ===========

> > # for i in `seq 1000`; do echo "i = $i" && ./tgkill03 || break; done

> > ...

> > i = 96

> > tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s

> > tgkill03.c:106: FAIL: Defunct tid should have failed with ESRCH: SUCCESS

> >

> > ===========

> > --- a/testcases/kernel/syscalls/tgkill/tgkill03.c

> > +++ b/testcases/kernel/syscalls/tgkill/tgkill03.c

> > @@ -5,6 +5,7 @@

> >   * Test simple tgkill() error cases.

> >   */

> >

> > +#include <stdio.h>

> >  #include <pthread.h>

> >  #include <pwd.h>

> >  #include <sys/types.h>

> > @@ -19,6 +20,7 @@ static pid_t parent_tgid;

> >  static pid_t parent_tid;

> >  static pid_t child_tid;

> >  static pid_t defunct_tid;

> > +char buf[1024];

> >

> >  static const int invalid_pid = -1;

> >

> > @@ -35,6 +37,8 @@ static void *defunct_thread_func(void *arg)

> >  {

> >         defunct_tid = sys_gettid();

> >

> > +       sprintf(buf, "/proc/pid/task/%d", defunct_tid);

>

> How about?

>           sprintf(buf, "/proc/%d/task/%d", getpid(), defunct_tid);

>

>

Ah, I was stupid to lost the getpid :-).

This look prettry good. The test fall into while loop when defunct_tid is
still exit, and get the expected result once /proc/pid/task/<tid>
disappears.

# for i in `seq 1000`; do echo "i = $i" && ./tgkill03 || break; done
...
i = 997
tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!
tgkill03.c:102: PASS: Defunct tgid failed as expected: ESRCH

Summary:
passed   1
failed   0
skipped  0
warnings 0
...
i = 1000
tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s
tgkill03.c:102: PASS: Defunct tgid failed as expected: ESRCH

Summary:
passed   1
failed   0
skipped  0
warnings 0

-- 
Regards,
Li Wang
<div dir="ltr"><div dir="ltr"><div class="gmail_default" style="font-size:small"><br></div></div><br><div class="gmail_quote"><div dir="ltr" class="gmail_attr">On Sun, Jun 16, 2019 at 3:04 PM Jan Stancek &lt;<a href="mailto:jstancek@redhat.com">jstancek@redhat.com</a>&gt; wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex"><br>
<br>
----- Original Message -----<br>
&gt; On Sun, Jun 16, 2019 at 7:08 AM Jan Stancek &lt;<a href="mailto:jstancek@redhat.com" target="_blank">jstancek@redhat.com</a>&gt; wrote:<br>
&gt; <br>
&gt; &gt;<br>
&gt; &gt; ----- Original Message -----<br>
&gt; &gt; &gt; +static void setup(void)<br>
&gt; &gt; &gt; +{<br>
&gt; &gt; &gt; +     sigset_t sigusr1;<br>
&gt; &gt; &gt; +     pthread_t defunct_thread;<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     sigemptyset(&amp;sigusr1);<br>
&gt; &gt; &gt; +     sigaddset(&amp;sigusr1, SIGUSR1);<br>
&gt; &gt; &gt; +     pthread_sigmask(SIG_BLOCK, &amp;sigusr1, NULL);<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     parent_tgid = getpid();<br>
&gt; &gt; &gt; +     parent_tid = sys_gettid();<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     SAFE_PTHREAD_CREATE(&amp;child_thread, NULL, child_thread_func, NULL);<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     TST_CHECKPOINT_WAIT(0);<br>
&gt; &gt;<br>
&gt; &gt; I&#39;m seeing reports of this test failing on s390x:<br>
&gt; &gt;<br>
&gt; &gt; st_test.c:1096: INFO: Timeout per run is 0h 05m 00s<br>
&gt; &gt; tgkill03.c:92: PASS: Invalid tgid failed as expected: EINVAL<br>
&gt; &gt; tgkill03.c:92: PASS: Invalid tid failed as expected: EINVAL<br>
&gt; &gt; tgkill03.c:92: PASS: Invalid signal failed as expected: EINVAL<br>
&gt; &gt; tgkill03.c:96: FAIL: Defunct tid should have failed with ESRCH: SUCCESS<br>
&gt; &gt; tgkill03.c:92: PASS: Defunct tgid failed as expected: ESRCH<br>
&gt; &gt; tgkill03.c:99: PASS: Valid tgkill call succeeded<br>
&gt; &gt;<br>
&gt; &gt; and I suspect this piece:<br>
&gt; &gt;<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     SAFE_PTHREAD_CREATE(&amp;defunct_thread, NULL, defunct_thread_func,<br>
&gt; &gt; NULL);<br>
&gt; &gt; &gt; +<br>
&gt; &gt; &gt; +     SAFE_PTHREAD_JOIN(defunct_thread, NULL);<br>
&gt; &gt; &gt; +}<br>
&gt; &gt;<br>
&gt; &gt; glibc pthread_join() waits for CLONE_CHILD_CLEARTID to clear tid,<br>
&gt; &gt; and then resumes. Which kernel does at:<br>
&gt; &gt;   do_exit<br>
&gt; &gt;     exit_mm<br>
&gt; &gt;       mm_release<br>
&gt; &gt;         put_user(0, tsk-&gt;clear_child_tid);<br>
&gt; &gt;<br>
&gt; &gt; so there&#39;s still work to be done after that, and I suspect tid is still<br>
&gt; &gt; valid<br>
&gt; &gt; while that happens.<br>
&gt; &gt;<br>
&gt; &gt; My first idea: wait until /proc/pid/task/&lt;tid&gt; disappears.<br>
&gt; &gt;<br>
&gt; <br>
&gt; The anaysis is probably right, but this idea doesn&#39;t work for me. Seems<br>
&gt; /proc/pid/task/&lt;tid&gt; is not the key point to confirm that tid has been<br>
&gt; clear.<br>
&gt; <br>
&gt; I just have a try as below:<br>
&gt; <br>
&gt; ===========<br>
&gt; # for i in `seq 1000`; do echo &quot;i = $i&quot; &amp;&amp; ./tgkill03 || break; done<br>
&gt; ...<br>
&gt; i = 96<br>
&gt; tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s<br>
&gt; tgkill03.c:106: FAIL: Defunct tid should have failed with ESRCH: SUCCESS<br>
&gt; <br>
&gt; ===========<br>
&gt; --- a/testcases/kernel/syscalls/tgkill/tgkill03.c<br>
&gt; +++ b/testcases/kernel/syscalls/tgkill/tgkill03.c<br>
&gt; @@ -5,6 +5,7 @@<br>
&gt;   * Test simple tgkill() error cases.<br>
&gt;   */<br>
&gt; <br>
&gt; +#include &lt;stdio.h&gt;<br>
&gt;  #include &lt;pthread.h&gt;<br>
&gt;  #include &lt;pwd.h&gt;<br>
&gt;  #include &lt;sys/types.h&gt;<br>
&gt; @@ -19,6 +20,7 @@ static pid_t parent_tgid;<br>
&gt;  static pid_t parent_tid;<br>
&gt;  static pid_t child_tid;<br>
&gt;  static pid_t defunct_tid;<br>
&gt; +char buf[1024];<br>
&gt; <br>
&gt;  static const int invalid_pid = -1;<br>
&gt; <br>
&gt; @@ -35,6 +37,8 @@ static void *defunct_thread_func(void *arg)<br>
&gt;  {<br>
&gt;         defunct_tid = sys_gettid();<br>
&gt; <br>
&gt; +       sprintf(buf, &quot;/proc/pid/task/%d&quot;, defunct_tid);<br>
<br>
How about?<br>
          sprintf(buf, &quot;/proc/%d/task/%d&quot;, getpid(), defunct_tid);<br>
<br></blockquote><div><br></div><div class="gmail_default" style="font-size:small">Ah, I was stupid to lost the getpid :-).</div><div class="gmail_default" style="font-size:small"><br></div><div class="gmail_default" style="font-size:small">This look prettry good. The test fall into while loop when defunct_tid is still exit, and get the expected result once /proc/pid/task/&lt;tid&gt; disappears.</div><div class="gmail_default" style="font-size:small"><br></div><div class="gmail_default" style="font-size:small"># for i in `seq 1000`; do echo &quot;i = $i&quot; &amp;&amp; ./tgkill03 || break; done<br></div><div class="gmail_default" style="font-size:small">...</div><div class="gmail_default" style="font-size:small">i = 997<br>tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s<br></div><span class="gmail_default" style="font-size:small"></span>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:95: INFO: Debug: /proc/5164/task/5166 still exist!<br>tgkill03.c:102: PASS: Defunct tgid failed as expected: ESRCH<br><br>Summary:<br>passed   1<br>failed   0<br>skipped  0<br>warnings 0</div><div class="gmail_quote"><div class="gmail_default" style="font-size:small"></div><div class="gmail_default" style="font-size:small">...</div><div class="gmail_default" style="font-size:small">i = 1000</div>tst_test.c:1112: INFO: Timeout per run is 0h 05m 00s<br>tgkill03.c:102: PASS: Defunct tgid failed as expected: ESRCH<br><br>Summary:<br>passed   1<br>failed   0<br>skipped  0<br>warnings 0</div><div><br></div>-- <br><div dir="ltr" class="gmail_signature"><div dir="ltr"><div>Regards,<br></div><div>Li Wang<br></div></div></div></div>

Patch

diff --git a/runtest/syscalls b/runtest/syscalls
index 7af9136..b090408 100644
--- a/runtest/syscalls
+++ b/runtest/syscalls
@@ -1402,6 +1402,7 @@  syslog12 syslog12
 
 tgkill01 tgkill01
 tgkill02 tgkill02
+tgkill03 tgkill03
 
 time01 time01
 time02 time02
diff --git a/testcases/kernel/syscalls/tgkill/.gitignore b/testcases/kernel/syscalls/tgkill/.gitignore
index 42be2bb..a6d2299 100644
--- a/testcases/kernel/syscalls/tgkill/.gitignore
+++ b/testcases/kernel/syscalls/tgkill/.gitignore
@@ -1,2 +1,3 @@ 
 tgkill01
 tgkill02
+tgkill03
diff --git a/testcases/kernel/syscalls/tgkill/tgkill03.c b/testcases/kernel/syscalls/tgkill/tgkill03.c
new file mode 100644
index 0000000..b78e9d9
--- /dev/null
+++ b/testcases/kernel/syscalls/tgkill/tgkill03.c
@@ -0,0 +1,110 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018 Google, Inc.
+ *
+ * Test simple tgkill() error cases.
+ */
+
+#include <pthread.h>
+#include <pwd.h>
+#include <sys/types.h>
+
+#include "tst_safe_pthread.h"
+#include "tst_test.h"
+#include "tgkill.h"
+
+static pthread_t child_thread;
+
+static pid_t parent_tgid;
+static pid_t parent_tid;
+static pid_t child_tid;
+static pid_t defunct_tid;
+
+static const int invalid_pid = -1;
+
+static void *child_thread_func(void *arg)
+{
+	child_tid = sys_gettid();
+
+	TST_CHECKPOINT_WAKE_AND_WAIT(0);
+
+	return arg;
+}
+
+static void *defunct_thread_func(void *arg)
+{
+	defunct_tid = sys_gettid();
+
+	return arg;
+}
+
+static void setup(void)
+{
+	sigset_t sigusr1;
+	pthread_t defunct_thread;
+
+	sigemptyset(&sigusr1);
+	sigaddset(&sigusr1, SIGUSR1);
+	pthread_sigmask(SIG_BLOCK, &sigusr1, NULL);
+
+	parent_tgid = getpid();
+	parent_tid = sys_gettid();
+
+	SAFE_PTHREAD_CREATE(&child_thread, NULL, child_thread_func, NULL);
+
+	TST_CHECKPOINT_WAIT(0);
+
+	SAFE_PTHREAD_CREATE(&defunct_thread, NULL, defunct_thread_func, NULL);
+
+	SAFE_PTHREAD_JOIN(defunct_thread, NULL);
+}
+
+static void cleanup(void)
+{
+	TST_CHECKPOINT_WAKE(0);
+
+	SAFE_PTHREAD_JOIN(child_thread, NULL);
+}
+
+static const struct testcase {
+	const char *desc;
+	const int *tgid;
+	const int *tid;
+	const int sig;
+	const int err;
+} testcases[] = {
+	{ "Invalid tgid", &invalid_pid, &parent_tid, SIGUSR1, EINVAL },
+	{ "Invalid tid", &parent_tgid, &invalid_pid, SIGUSR1, EINVAL },
+	{ "Invalid signal", &parent_tgid, &parent_tid, -1, EINVAL },
+	{ "Defunct thread ID", &parent_tgid, &defunct_tid, SIGUSR1, ESRCH },
+	{ "Valid tgkill call", &parent_tgid, &child_tid, SIGUSR1, 0 },
+};
+
+static void run(unsigned int i)
+{
+	const struct testcase *tc = &testcases[i];
+
+	TEST(sys_tgkill(*tc->tgid, *tc->tid, tc->sig));
+	if (tc->err) {
+		if (TST_RET < 0 && TST_ERR == tc->err)
+			tst_res(TPASS | TTERRNO, "%s failed as expected",
+				tc->desc);
+		else
+			tst_res(TFAIL | TTERRNO,
+				"%s should have failed with %s", tc->desc,
+				tst_strerrno(tc->err));
+	} else {
+		if (TST_RET == 0)
+			tst_res(TPASS, "%s succeeded", tc->desc);
+		else
+			tst_res(TFAIL | TTERRNO, "%s failed", tc->desc);
+	}
+}
+
+static struct tst_test test = {
+	.tcnt = ARRAY_SIZE(testcases),
+	.needs_checkpoints = 1,
+	.setup = setup,
+	.cleanup = cleanup,
+	.test = run,
+};