diff mbox series

[RFC,PATCH-for-8.2?] accel/tcg: Implement tcg_unregister_thread()

Message ID 20231204194039.56169-1-philmd@linaro.org
State New
Headers show
Series [RFC,PATCH-for-8.2?] accel/tcg: Implement tcg_unregister_thread() | expand

Commit Message

Philippe Mathieu-Daudé Dec. 4, 2023, 7:40 p.m. UTC
Unplugging vCPU triggers the following assertion in
tcg_register_thread():

 796 void tcg_register_thread(void)
 797 {
 ...
 812     /* Claim an entry in tcg_ctxs */
 813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
 814     g_assert(n < tcg_max_ctxs);

Implement and use tcg_unregister_thread() so when a
vCPU is unplugged, the tcg_cur_ctxs refcount is
decremented.

Reported-by: Michal Suchánek <msuchanek@suse.de>
Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
---
RFC: untested
Report: https://lore.kernel.org/qemu-devel/20231204183638.GZ9696@kitsune.suse.cz/
---
 include/tcg/startup.h           |  5 +++++
 accel/tcg/tcg-accel-ops-mttcg.c |  1 +
 accel/tcg/tcg-accel-ops-rr.c    |  1 +
 tcg/tcg.c                       | 17 +++++++++++++++++
 4 files changed, 24 insertions(+)

Comments

Stefan Hajnoczi Dec. 4, 2023, 7:50 p.m. UTC | #1
On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Unplugging vCPU triggers the following assertion in

Unplugging leaks the tcg context refcount but does not trigger the
assertion directly. Maybe clarify that by changing the wording:

"Plugging a vCPU after it has been unplugged triggers..."

> tcg_register_thread():
>
>  796 void tcg_register_thread(void)
>  797 {
>  ...
>  812     /* Claim an entry in tcg_ctxs */
>  813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
>  814     g_assert(n < tcg_max_ctxs);
>
> Implement and use tcg_unregister_thread() so when a
> vCPU is unplugged, the tcg_cur_ctxs refcount is
> decremented.
>
> Reported-by: Michal Suchánek <msuchanek@suse.de>
> Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> RFC: untested
> Report: https://lore.kernel.org/qemu-devel/20231204183638.GZ9696@kitsune.suse.cz/
> ---
>  include/tcg/startup.h           |  5 +++++
>  accel/tcg/tcg-accel-ops-mttcg.c |  1 +
>  accel/tcg/tcg-accel-ops-rr.c    |  1 +
>  tcg/tcg.c                       | 17 +++++++++++++++++
>  4 files changed, 24 insertions(+)
>
> diff --git a/include/tcg/startup.h b/include/tcg/startup.h
> index f71305765c..520942a4a1 100644
> --- a/include/tcg/startup.h
> +++ b/include/tcg/startup.h
> @@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
>   */
>  void tcg_register_thread(void);
>
> +/**
> + * tcg_unregister_thread: Unregister this thread with the TCG runtime
> + */
> +void tcg_unregister_thread(void);
> +
>  /**
>   * tcg_prologue_init(): Generate the code for the TCG prologue
>   *
> diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> index fac80095bb..88d7427aad 100644
> --- a/accel/tcg/tcg-accel-ops-mttcg.c
> +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> @@ -120,6 +120,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
>
>      tcg_cpus_destroy(cpu);
>      qemu_mutex_unlock_iothread();
> +    tcg_unregister_thread();
>      rcu_remove_force_rcu_notifier(&force_rcu.notifier);
>      rcu_unregister_thread();
>      return NULL;
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index 611932f3c3..c2af3aad21 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -302,6 +302,7 @@ static void *rr_cpu_thread_fn(void *arg)
>          rr_deal_with_unplugged_cpus();
>      }
>
> +    tcg_unregister_thread();
>      rcu_remove_force_rcu_notifier(&force_rcu);
>      rcu_unregister_thread();
>      return NULL;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index d2ea22b397..5125342d70 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -781,11 +781,18 @@ static void alloc_tcg_plugin_context(TCGContext *s)
>   * modes.
>   */
>  #ifdef CONFIG_USER_ONLY
> +
>  void tcg_register_thread(void)
>  {
>      tcg_ctx = &tcg_init_ctx;
>  }
> +
> +void tcg_unregister_thread(void)
> +{
> +}
> +
>  #else
> +
>  void tcg_register_thread(void)
>  {
>      TCGContext *s = g_malloc(sizeof(*s));
> @@ -814,6 +821,16 @@ void tcg_register_thread(void)
>
>      tcg_ctx = s;
>  }
> +
> +void tcg_unregister_thread(void)
> +{
> +    unsigned int n;
> +
> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> +    g_free(tcg_ctxs[n]);
> +    qatomic_set(&tcg_ctxs[n], NULL);
> +}

tcg_ctxs[n] may not be our context, so this looks like it could free
another thread's context and lead to undefined behavior.

I haven't read the code so I can't suggest an alternative myself.

Stefan

> +
>  #endif /* !CONFIG_USER_ONLY */
>
>  /* pool based memory allocation */
> --
> 2.41.0
>
Michal Suchánek Dec. 4, 2023, 8:09 p.m. UTC | #2
On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
> On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> >
> > Unplugging vCPU triggers the following assertion in
> 
> Unplugging leaks the tcg context refcount but does not trigger the
> assertion directly. Maybe clarify that by changing the wording:
> 
> "Plugging a vCPU after it has been unplugged triggers..."
> 
> > tcg_register_thread():
> >
> >  796 void tcg_register_thread(void)
> >  797 {
> >  ...
> >  812     /* Claim an entry in tcg_ctxs */
> >  813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
> >  814     g_assert(n < tcg_max_ctxs);
> >
> > Implement and use tcg_unregister_thread() so when a
> > vCPU is unplugged, the tcg_cur_ctxs refcount is
> > decremented.
> >
> > Reported-by: Michal Suchánek <msuchanek@suse.de>
> > Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
> > Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> > ---
> > RFC: untested
> > Report: https://lore.kernel.org/qemu-devel/20231204183638.GZ9696@kitsune.suse.cz/
> > ---
> >  include/tcg/startup.h           |  5 +++++
> >  accel/tcg/tcg-accel-ops-mttcg.c |  1 +
> >  accel/tcg/tcg-accel-ops-rr.c    |  1 +
> >  tcg/tcg.c                       | 17 +++++++++++++++++
> >  4 files changed, 24 insertions(+)
> >
> > diff --git a/include/tcg/startup.h b/include/tcg/startup.h
> > index f71305765c..520942a4a1 100644
> > --- a/include/tcg/startup.h
> > +++ b/include/tcg/startup.h
> > @@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
> >   */
> >  void tcg_register_thread(void);
> >
> > +/**
> > + * tcg_unregister_thread: Unregister this thread with the TCG runtime
> > + */
> > +void tcg_unregister_thread(void);
> > +
> >  /**
> >   * tcg_prologue_init(): Generate the code for the TCG prologue
> >   *
> > diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> > index fac80095bb..88d7427aad 100644
> > --- a/accel/tcg/tcg-accel-ops-mttcg.c
> > +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> > @@ -120,6 +120,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
> >
> >      tcg_cpus_destroy(cpu);
> >      qemu_mutex_unlock_iothread();
> > +    tcg_unregister_thread();
> >      rcu_remove_force_rcu_notifier(&force_rcu.notifier);
> >      rcu_unregister_thread();
> >      return NULL;
> > diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> > index 611932f3c3..c2af3aad21 100644
> > --- a/accel/tcg/tcg-accel-ops-rr.c
> > +++ b/accel/tcg/tcg-accel-ops-rr.c
> > @@ -302,6 +302,7 @@ static void *rr_cpu_thread_fn(void *arg)
> >          rr_deal_with_unplugged_cpus();
> >      }
> >
> > +    tcg_unregister_thread();
> >      rcu_remove_force_rcu_notifier(&force_rcu);
> >      rcu_unregister_thread();
> >      return NULL;
> > diff --git a/tcg/tcg.c b/tcg/tcg.c
> > index d2ea22b397..5125342d70 100644
> > --- a/tcg/tcg.c
> > +++ b/tcg/tcg.c
> > @@ -781,11 +781,18 @@ static void alloc_tcg_plugin_context(TCGContext *s)
> >   * modes.
> >   */
> >  #ifdef CONFIG_USER_ONLY
> > +
> >  void tcg_register_thread(void)
> >  {
> >      tcg_ctx = &tcg_init_ctx;
> >  }
> > +
> > +void tcg_unregister_thread(void)
> > +{
> > +}
> > +
> >  #else
> > +
> >  void tcg_register_thread(void)
> >  {
> >      TCGContext *s = g_malloc(sizeof(*s));
> > @@ -814,6 +821,16 @@ void tcg_register_thread(void)
> >
> >      tcg_ctx = s;
> >  }
> > +
> > +void tcg_unregister_thread(void)
> > +{
> > +    unsigned int n;
> > +
> > +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> > +    g_free(tcg_ctxs[n]);
> > +    qatomic_set(&tcg_ctxs[n], NULL);
> > +}
> 
> tcg_ctxs[n] may not be our context, so this looks like it could free
> another thread's context and lead to undefined behavior.

There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
well. That would require a bitmap of used threads contexts rather than a
counter, though.

Thanks

Michal
Richard Henderson Dec. 4, 2023, 11:02 p.m. UTC | #3
On 12/4/23 12:09, Michal Suchánek wrote:
> On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
>> On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>>> +void tcg_unregister_thread(void)
>>> +{
>>> +    unsigned int n;
>>> +
>>> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
>>> +    g_free(tcg_ctxs[n]);
>>> +    qatomic_set(&tcg_ctxs[n], NULL);
>>> +}
>>
>> tcg_ctxs[n] may not be our context, so this looks like it could free
>> another thread's context and lead to undefined behavior.

Correct.

> There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
> well. That would require a bitmap of used threads contexts rather than a
> counter, though.

Or don't free the context at all, but re-use it when incrementing and tcg_ctxs[n] != null 
(i.e. plugging in a repacement vcpu).  After all, there can only be tcg_max_ctxs contexts.


r~
Michal Suchánek Dec. 5, 2023, 10:09 a.m. UTC | #4
On Mon, Dec 04, 2023 at 03:02:45PM -0800, Richard Henderson wrote:
> On 12/4/23 12:09, Michal Suchánek wrote:
> > On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
> > > On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> > > > +void tcg_unregister_thread(void)
> > > > +{
> > > > +    unsigned int n;
> > > > +
> > > > +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> > > > +    g_free(tcg_ctxs[n]);
> > > > +    qatomic_set(&tcg_ctxs[n], NULL);
> > > > +}
> > > 
> > > tcg_ctxs[n] may not be our context, so this looks like it could free
> > > another thread's context and lead to undefined behavior.
> 
> Correct.
> 
> > There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
> > well. That would require a bitmap of used threads contexts rather than a
> > counter, though.
> 
> Or don't free the context at all, but re-use it when incrementing and
> tcg_ctxs[n] != null (i.e. plugging in a repacement vcpu).  After all, there
> can only be tcg_max_ctxs contexts.

But you would not know which contexts are in use and which aren't without
tracking the association of contexts to CPUs.

Unless there is a cpu array somewhere and you can use the same index for
both to create the association.

Thanks

Michal
Michal Suchánek Dec. 6, 2023, 12:56 p.m. UTC | #5
On Tue, Dec 05, 2023 at 11:09:59AM +0100, Michal Suchánek wrote:
> On Mon, Dec 04, 2023 at 03:02:45PM -0800, Richard Henderson wrote:
> > On 12/4/23 12:09, Michal Suchánek wrote:
> > > On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
> > > > On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> > > > > +void tcg_unregister_thread(void)
> > > > > +{
> > > > > +    unsigned int n;
> > > > > +
> > > > > +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> > > > > +    g_free(tcg_ctxs[n]);
> > > > > +    qatomic_set(&tcg_ctxs[n], NULL);
> > > > > +}
> > > > 
> > > > tcg_ctxs[n] may not be our context, so this looks like it could free
> > > > another thread's context and lead to undefined behavior.
> > 
> > Correct.
> > 
> > > There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
> > > well. That would require a bitmap of used threads contexts rather than a
> > > counter, though.
> > 
> > Or don't free the context at all, but re-use it when incrementing and
> > tcg_ctxs[n] != null (i.e. plugging in a repacement vcpu).  After all, there
> > can only be tcg_max_ctxs contexts.
> 
> But you would not know which contexts are in use and which aren't without
> tracking the association of contexts to CPUs.
> 
> Unless there is a cpu array somewhere and you can use the same index for
> both to create the association.

I tried to use cpu_index for correlating the tcg_ctx with cpu. I added
some asserts that only null contexts are allocated and non-null contexts
released but qemu crashes somewhere in tcg sometime after the guest gets
to switch root.

Thanks

Michal
Miguel Luis Dec. 6, 2023, 2:17 p.m. UTC | #6
Hi!

On 04/12/2023 18:40, Philippe Mathieu-Daudé wrote:
> Unplugging vCPU triggers the following assertion in
> tcg_register_thread():
>
>  796 void tcg_register_thread(void)
>  797 {
>  ...
>  812     /* Claim an entry in tcg_ctxs */
>  813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
>  814     g_assert(n < tcg_max_ctxs);
>
> Implement and use tcg_unregister_thread() so when a
> vCPU is unplugged, the tcg_cur_ctxs refcount is
> decremented.


I've had addressed this issue before (posted at [1]) and had exercised
it with vCPU hotplug/unplug patches for ARM although I was not sure about what
would be needed to be done regarding plugins on the context of
tcg_unregister_thread. I guess they would need to be freed also?


> Reported-by: Michal Suchánek <msuchanek@suse.de>
> Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
> RFC: untested
> Report: https://lore.kernel.org/qemu-devel/20231204183638.GZ9696@kitsune.suse.cz/
> ---
>  include/tcg/startup.h           |  5 +++++
>  accel/tcg/tcg-accel-ops-mttcg.c |  1 +
>  accel/tcg/tcg-accel-ops-rr.c    |  1 +
>  tcg/tcg.c                       | 17 +++++++++++++++++
>  4 files changed, 24 insertions(+)
>
> diff --git a/include/tcg/startup.h b/include/tcg/startup.h
> index f71305765c..520942a4a1 100644
> --- a/include/tcg/startup.h
> +++ b/include/tcg/startup.h
> @@ -45,6 +45,11 @@ void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
>   */
>  void tcg_register_thread(void);
>  
> +/**
> + * tcg_unregister_thread: Unregister this thread with the TCG runtime
> + */
> +void tcg_unregister_thread(void);
> +
>  /**
>   * tcg_prologue_init(): Generate the code for the TCG prologue
>   *
> diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
> index fac80095bb..88d7427aad 100644
> --- a/accel/tcg/tcg-accel-ops-mttcg.c
> +++ b/accel/tcg/tcg-accel-ops-mttcg.c
> @@ -120,6 +120,7 @@ static void *mttcg_cpu_thread_fn(void *arg)
>  
>      tcg_cpus_destroy(cpu);
>      qemu_mutex_unlock_iothread();
> +    tcg_unregister_thread();
>      rcu_remove_force_rcu_notifier(&force_rcu.notifier);
>      rcu_unregister_thread();
>      return NULL;
> diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
> index 611932f3c3..c2af3aad21 100644
> --- a/accel/tcg/tcg-accel-ops-rr.c
> +++ b/accel/tcg/tcg-accel-ops-rr.c
> @@ -302,6 +302,7 @@ static void *rr_cpu_thread_fn(void *arg)
>          rr_deal_with_unplugged_cpus();
>      }
>  
> +    tcg_unregister_thread();
>      rcu_remove_force_rcu_notifier(&force_rcu);
>      rcu_unregister_thread();
>      return NULL;
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index d2ea22b397..5125342d70 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -781,11 +781,18 @@ static void alloc_tcg_plugin_context(TCGContext *s)
>   * modes.
>   */
>  #ifdef CONFIG_USER_ONLY
> +
>  void tcg_register_thread(void)
>  {
>      tcg_ctx = &tcg_init_ctx;
>  }
> +
> +void tcg_unregister_thread(void)
> +{
> +}
> +
>  #else
> +
>  void tcg_register_thread(void)
>  {
>      TCGContext *s = g_malloc(sizeof(*s));
> @@ -814,6 +821,16 @@ void tcg_register_thread(void)
>  
>      tcg_ctx = s;
>  }
> +
> +void tcg_unregister_thread(void)
> +{
> +    unsigned int n;
> +
> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> +    g_free(tcg_ctxs[n]);


Is the above off-by-one?


> +    qatomic_set(&tcg_ctxs[n], NULL);
> +}
> +

Thank you

Miguel

[1]: https://lore.kernel.org/qemu-devel/20230926103654.34424-5-salil.mehta@huawei.com/


>  #endif /* !CONFIG_USER_ONLY */
>  
>  /* pool based memory allocation */
Philippe Mathieu-Daudé Dec. 6, 2023, 2:29 p.m. UTC | #7
Hi Stefan,

On 6/12/23 13:56, Michal Suchánek wrote:
> On Tue, Dec 05, 2023 at 11:09:59AM +0100, Michal Suchánek wrote:
>> On Mon, Dec 04, 2023 at 03:02:45PM -0800, Richard Henderson wrote:
>>> On 12/4/23 12:09, Michal Suchánek wrote:
>>>> On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
>>>>> On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>>>>>> +void tcg_unregister_thread(void)
>>>>>> +{
>>>>>> +    unsigned int n;
>>>>>> +
>>>>>> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
>>>>>> +    g_free(tcg_ctxs[n]);
>>>>>> +    qatomic_set(&tcg_ctxs[n], NULL);
>>>>>> +}
>>>>>
>>>>> tcg_ctxs[n] may not be our context, so this looks like it could free
>>>>> another thread's context and lead to undefined behavior.
>>>
>>> Correct.
>>>
>>>> There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
>>>> well. That would require a bitmap of used threads contexts rather than a
>>>> counter, though.
>>>
>>> Or don't free the context at all, but re-use it when incrementing and
>>> tcg_ctxs[n] != null (i.e. plugging in a repacement vcpu).  After all, there
>>> can only be tcg_max_ctxs contexts.
>>
>> But you would not know which contexts are in use and which aren't without
>> tracking the association of contexts to CPUs.
>>
>> Unless there is a cpu array somewhere and you can use the same index for
>> both to create the association.
> 
> I tried to use cpu_index for correlating the tcg_ctx with cpu. I added
> some asserts that only null contexts are allocated and non-null contexts
> released but qemu crashes somewhere in tcg sometime after the guest gets
> to switch root.

Since this isn't trivial and is a long standing issue, let's not
block the 8.2 release with it.

Regards,

Phil.
Stefan Hajnoczi Dec. 6, 2023, 3:15 p.m. UTC | #8
On Wed, 6 Dec 2023 at 09:29, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
>
> Hi Stefan,
>
> On 6/12/23 13:56, Michal Suchánek wrote:
> > On Tue, Dec 05, 2023 at 11:09:59AM +0100, Michal Suchánek wrote:
> >> On Mon, Dec 04, 2023 at 03:02:45PM -0800, Richard Henderson wrote:
> >>> On 12/4/23 12:09, Michal Suchánek wrote:
> >>>> On Mon, Dec 04, 2023 at 02:50:17PM -0500, Stefan Hajnoczi wrote:
> >>>>> On Mon, 4 Dec 2023 at 14:40, Philippe Mathieu-Daudé <philmd@linaro.org> wrote:
> >>>>>> +void tcg_unregister_thread(void)
> >>>>>> +{
> >>>>>> +    unsigned int n;
> >>>>>> +
> >>>>>> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
> >>>>>> +    g_free(tcg_ctxs[n]);
> >>>>>> +    qatomic_set(&tcg_ctxs[n], NULL);
> >>>>>> +}
> >>>>>
> >>>>> tcg_ctxs[n] may not be our context, so this looks like it could free
> >>>>> another thread's context and lead to undefined behavior.
> >>>
> >>> Correct.
> >>>
> >>>> There is cpu->thread_id so perhaps cpu->thread_ctx could be added as
> >>>> well. That would require a bitmap of used threads contexts rather than a
> >>>> counter, though.
> >>>
> >>> Or don't free the context at all, but re-use it when incrementing and
> >>> tcg_ctxs[n] != null (i.e. plugging in a repacement vcpu).  After all, there
> >>> can only be tcg_max_ctxs contexts.
> >>
> >> But you would not know which contexts are in use and which aren't without
> >> tracking the association of contexts to CPUs.
> >>
> >> Unless there is a cpu array somewhere and you can use the same index for
> >> both to create the association.
> >
> > I tried to use cpu_index for correlating the tcg_ctx with cpu. I added
> > some asserts that only null contexts are allocated and non-null contexts
> > released but qemu crashes somewhere in tcg sometime after the guest gets
> > to switch root.
>
> Since this isn't trivial and is a long standing issue, let's not
> block the 8.2 release with it.

Sounds good.

Thanks,
Stefan
Michal Suchánek Dec. 6, 2023, 3:25 p.m. UTC | #9
On Wed, Dec 06, 2023 at 01:17:08PM -0100, Miguel Luis wrote:
> Hi!
> 
> On 04/12/2023 18:40, Philippe Mathieu-Daudé wrote:
> > Unplugging vCPU triggers the following assertion in
> > tcg_register_thread():
> >
> >  796 void tcg_register_thread(void)
> >  797 {
> >  ...
> >  812     /* Claim an entry in tcg_ctxs */
> >  813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
> >  814     g_assert(n < tcg_max_ctxs);
> >
> > Implement and use tcg_unregister_thread() so when a
> > vCPU is unplugged, the tcg_cur_ctxs refcount is
> > decremented.
> 
> 
> I've had addressed this issue before (posted at [1]) and had exercised
> it with vCPU hotplug/unplug patches for ARM although I was not sure about what
> would be needed to be done regarding plugins on the context of
> tcg_unregister_thread. I guess they would need to be freed also?

Doesn't it have the same problem that it will randomly free some context
which is not necessarily associated with the unplugged CPU?

Consider machine with 4 CPUs, they are likely added in order - cpu0
getting context0, cpu1 context1, etc.

Unplug CPU 1. Given that context 3 is top the would be unallocated by
the decrement, or am I missing something?

Thanks

Michal

> 
> Thank you
> 
> Miguel
> 
> [1]: https://lore.kernel.org/qemu-devel/20230926103654.34424-5-salil.mehta@huawei.com/
Alex Bennée Dec. 6, 2023, 3:44 p.m. UTC | #10
Miguel Luis <miguel.luis@oracle.com> writes:

> Hi!
>
> On 04/12/2023 18:40, Philippe Mathieu-Daudé wrote:
>> Unplugging vCPU triggers the following assertion in
>> tcg_register_thread():
>>
>>  796 void tcg_register_thread(void)
>>  797 {
>>  ...
>>  812     /* Claim an entry in tcg_ctxs */
>>  813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
>>  814     g_assert(n < tcg_max_ctxs);
>>
>> Implement and use tcg_unregister_thread() so when a
>> vCPU is unplugged, the tcg_cur_ctxs refcount is
>> decremented.
>
>
> I've had addressed this issue before (posted at [1]) and had exercised
> it with vCPU hotplug/unplug patches for ARM although I was not sure about what
> would be needed to be done regarding plugins on the context of
> tcg_unregister_thread. I guess they would need to be freed also?

Good catch. They should indeed.

>
>
>> Reported-by: Michal Suchánek <msuchanek@suse.de>
>> Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
>> Signed-off-by: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
<snip>
>>  void tcg_register_thread(void)
>>  {
>>      TCGContext *s = g_malloc(sizeof(*s));
>> @@ -814,6 +821,16 @@ void tcg_register_thread(void)
>>  
>>      tcg_ctx = s;
>>  }
>> +
>> +void tcg_unregister_thread(void)
>> +{
>> +    unsigned int n;
>> +
>> +    n = qatomic_fetch_dec(&tcg_cur_ctxs);
>> +    g_free(tcg_ctxs[n]);

Perhaps a bit of re-factoring and we could have a tcg_alloc_context and
tcg_free_context to keep everything together?

>
>
> Is the above off-by-one?
>
>
>> +    qatomic_set(&tcg_ctxs[n], NULL);
>> +}
>> +
>
> Thank you
>
> Miguel
>
> [1]: https://lore.kernel.org/qemu-devel/20230926103654.34424-5-salil.mehta@huawei.com/
>
>
>>  #endif /* !CONFIG_USER_ONLY */
>>  
>>  /* pool based memory allocation */
Miguel Luis Dec. 6, 2023, 3:49 p.m. UTC | #11
> On 6 Dec 2023, at 14:25, Michal Suchánek <msuchanek@suse.de> wrote:
> 
> On Wed, Dec 06, 2023 at 01:17:08PM -0100, Miguel Luis wrote:
>> Hi!
>> 
>> On 04/12/2023 18:40, Philippe Mathieu-Daudé wrote:
>>> Unplugging vCPU triggers the following assertion in
>>> tcg_register_thread():
>>> 
>>> 796 void tcg_register_thread(void)
>>> 797 {
>>> ...
>>> 812     /* Claim an entry in tcg_ctxs */
>>> 813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
>>> 814     g_assert(n < tcg_max_ctxs);
>>> 
>>> Implement and use tcg_unregister_thread() so when a
>>> vCPU is unplugged, the tcg_cur_ctxs refcount is
>>> decremented.
>> 
>> 
>> I've had addressed this issue before (posted at [1]) and had exercised
>> it with vCPU hotplug/unplug patches for ARM although I was not sure about what
>> would be needed to be done regarding plugins on the context of
>> tcg_unregister_thread. I guess they would need to be freed also?
> 
> Doesn't it have the same problem that it will randomly free some context
> which is not necessarily associated with the unplugged CPU?
> 
> Consider machine with 4 CPUs, they are likely added in order - cpu0
> getting context0, cpu1 context1, etc.
> 
> Unplug CPU 1. Given that context 3 is top the would be unallocated by
> the decrement, or am I missing something?
> 

I think you’re right and I share of the same opinion that matching a tcg thread
to a vCPU would be handy to solve this and maybe sorting tcg_ctxs after
unregistering the thread.

Thank you

Miguel

> Thanks
> 
> Michal
> 
>> 
>> Thank you
>> 
>> Miguel
>> 
>> [1]: https://lore.kernel.org/qemu-devel/20230926103654.34424-5-salil.mehta@huawei.com/
Michal Suchánek Dec. 6, 2023, 4:29 p.m. UTC | #12
On Wed, Dec 06, 2023 at 03:49:28PM +0000, Miguel Luis wrote:
> 
> 
> > On 6 Dec 2023, at 14:25, Michal Suchánek <msuchanek@suse.de> wrote:
> > 
> > On Wed, Dec 06, 2023 at 01:17:08PM -0100, Miguel Luis wrote:
> >> Hi!
> >> 
> >> On 04/12/2023 18:40, Philippe Mathieu-Daudé wrote:
> >>> Unplugging vCPU triggers the following assertion in
> >>> tcg_register_thread():
> >>> 
> >>> 796 void tcg_register_thread(void)
> >>> 797 {
> >>> ...
> >>> 812     /* Claim an entry in tcg_ctxs */
> >>> 813     n = qatomic_fetch_inc(&tcg_cur_ctxs);
> >>> 814     g_assert(n < tcg_max_ctxs);
> >>> 
> >>> Implement and use tcg_unregister_thread() so when a
> >>> vCPU is unplugged, the tcg_cur_ctxs refcount is
> >>> decremented.
> >> 
> >> 
> >> I've had addressed this issue before (posted at [1]) and had exercised
> >> it with vCPU hotplug/unplug patches for ARM although I was not sure about what
> >> would be needed to be done regarding plugins on the context of
> >> tcg_unregister_thread. I guess they would need to be freed also?
> > 
> > Doesn't it have the same problem that it will randomly free some context
> > which is not necessarily associated with the unplugged CPU?
> > 
> > Consider machine with 4 CPUs, they are likely added in order - cpu0
> > getting context0, cpu1 context1, etc.
> > 
> > Unplug CPU 1. Given that context 3 is top the would be unallocated by
> > the decrement, or am I missing something?
> > 
> 
> I think you’re right and I share of the same opinion that matching a tcg thread
> to a vCPU would be handy to solve this and maybe sorting tcg_ctxs after
> unregistering the thread.

Tried to apply the patch. It does not crash right away, and due to virsh
limitation I get only one (8-thread) core to hotplug so it did survive a few
hotplug cycles. After a while of hotplugging it crashed, anyway.

Given the atomic_dec there is probably no expectation that the
processing is sequential.

Thanks

Michal
diff mbox series

Patch

diff --git a/include/tcg/startup.h b/include/tcg/startup.h
index f71305765c..520942a4a1 100644
--- a/include/tcg/startup.h
+++ b/include/tcg/startup.h
@@ -45,6 +45,11 @@  void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
  */
 void tcg_register_thread(void);
 
+/**
+ * tcg_unregister_thread: Unregister this thread with the TCG runtime
+ */
+void tcg_unregister_thread(void);
+
 /**
  * tcg_prologue_init(): Generate the code for the TCG prologue
  *
diff --git a/accel/tcg/tcg-accel-ops-mttcg.c b/accel/tcg/tcg-accel-ops-mttcg.c
index fac80095bb..88d7427aad 100644
--- a/accel/tcg/tcg-accel-ops-mttcg.c
+++ b/accel/tcg/tcg-accel-ops-mttcg.c
@@ -120,6 +120,7 @@  static void *mttcg_cpu_thread_fn(void *arg)
 
     tcg_cpus_destroy(cpu);
     qemu_mutex_unlock_iothread();
+    tcg_unregister_thread();
     rcu_remove_force_rcu_notifier(&force_rcu.notifier);
     rcu_unregister_thread();
     return NULL;
diff --git a/accel/tcg/tcg-accel-ops-rr.c b/accel/tcg/tcg-accel-ops-rr.c
index 611932f3c3..c2af3aad21 100644
--- a/accel/tcg/tcg-accel-ops-rr.c
+++ b/accel/tcg/tcg-accel-ops-rr.c
@@ -302,6 +302,7 @@  static void *rr_cpu_thread_fn(void *arg)
         rr_deal_with_unplugged_cpus();
     }
 
+    tcg_unregister_thread();
     rcu_remove_force_rcu_notifier(&force_rcu);
     rcu_unregister_thread();
     return NULL;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d2ea22b397..5125342d70 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -781,11 +781,18 @@  static void alloc_tcg_plugin_context(TCGContext *s)
  * modes.
  */
 #ifdef CONFIG_USER_ONLY
+
 void tcg_register_thread(void)
 {
     tcg_ctx = &tcg_init_ctx;
 }
+
+void tcg_unregister_thread(void)
+{
+}
+
 #else
+
 void tcg_register_thread(void)
 {
     TCGContext *s = g_malloc(sizeof(*s));
@@ -814,6 +821,16 @@  void tcg_register_thread(void)
 
     tcg_ctx = s;
 }
+
+void tcg_unregister_thread(void)
+{
+    unsigned int n;
+
+    n = qatomic_fetch_dec(&tcg_cur_ctxs);
+    g_free(tcg_ctxs[n]);
+    qatomic_set(&tcg_ctxs[n], NULL);
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 /* pool based memory allocation */