diff mbox series

[RFC] cpus: split qemu_init_vcpu and delay vCPU thread creation

Message ID 20240529152219.825680-1-alex.bennee@linaro.org
State New
Headers show
Series [RFC] cpus: split qemu_init_vcpu and delay vCPU thread creation | expand

Commit Message

Alex Bennée May 29, 2024, 3:22 p.m. UTC
This ensures we don't start the thread until cpu_common_realizefn has
finished. This ensures that plugins will always run
qemu_plugin_vcpu_init__async first before any other states. It doesn't
totally eliminate the race that plugin_cpu_update__locked has to work
around though. I found this while reviewing the ips plugin which makes
heavy use of the vcpu phase callbacks.

An alternative might be to move the explicit creation of vCPU threads
to qdev_machine_creation_done()? It doesn't affect user-mode which
already has a thread to execute in and ensures the QOM object has
completed creation in cpu_create() before continuing.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
---
 include/hw/core/cpu.h      |  8 ++++++++
 accel/tcg/user-exec-stub.c |  5 +++++
 hw/core/cpu-common.c       |  7 ++++++-
 plugins/core.c             |  5 +++++
 system/cpus.c              | 15 ++++++++++-----
 5 files changed, 34 insertions(+), 6 deletions(-)

Comments

Philippe Mathieu-Daudé May 29, 2024, 3:34 p.m. UTC | #1
Hi Alex,

On 29/5/24 17:22, Alex Bennée wrote:
> This ensures we don't start the thread until cpu_common_realizefn has
> finished. This ensures that plugins will always run
> qemu_plugin_vcpu_init__async first before any other states. It doesn't
> totally eliminate the race that plugin_cpu_update__locked has to work
> around though. I found this while reviewing the ips plugin which makes
> heavy use of the vcpu phase callbacks.
> 
> An alternative might be to move the explicit creation of vCPU threads
> to qdev_machine_creation_done()? It doesn't affect user-mode which
> already has a thread to execute in and ensures the QOM object has
> completed creation in cpu_create() before continuing.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
>   include/hw/core/cpu.h      |  8 ++++++++
>   accel/tcg/user-exec-stub.c |  5 +++++
>   hw/core/cpu-common.c       |  7 ++++++-
>   plugins/core.c             |  5 +++++
>   system/cpus.c              | 15 ++++++++++-----
>   5 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index bb398e8237..6920699585 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -1041,6 +1041,14 @@ void end_exclusive(void);
>    */
>   void qemu_init_vcpu(CPUState *cpu);
>   
> +/**
> + * qemu_start_vcpu:
> + * @cpu: The vCPU to start.
> + *
> + * Create the vCPU thread and start it running.
> + */
> +void qemu_start_vcpu(CPUState *cpu);
> +
>   #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
>   #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
>   #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
> diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
> index 4fbe2dbdc8..162bb72bbe 100644
> --- a/accel/tcg/user-exec-stub.c
> +++ b/accel/tcg/user-exec-stub.c
> @@ -18,6 +18,11 @@ void cpu_exec_reset_hold(CPUState *cpu)
>   {
>   }
>   
> +void qemu_start_vcpu(CPUState *cpu)
> +{
> +    /* NOP for user-mode, we already have a thread */
> +}
> +
>   /* User mode emulation does not support record/replay yet.  */
>   
>   bool replay_exception(void)
> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
> index 0f0a247f56..68895ddd59 100644
> --- a/hw/core/cpu-common.c
> +++ b/hw/core/cpu-common.c
> @@ -230,7 +230,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp)
>       }
>   #endif
>   
> -    /* NOTE: latest generic point where the cpu is fully realized */
> +    /*
> +     * With everything set up we can finally start the vCPU thread.
> +     * This is a NOP for linux-user.
> +     * NOTE: latest generic point where the cpu is fully realized
> +     */
> +    qemu_start_vcpu(cpu);
>   }
>   
>   static void cpu_common_unrealizefn(DeviceState *dev)
> diff --git a/plugins/core.c b/plugins/core.c
> index 0726bc7f25..1e5da7853b 100644
> --- a/plugins/core.c
> +++ b/plugins/core.c
> @@ -65,6 +65,11 @@ static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
>       CPUState *cpu = container_of(k, CPUState, cpu_index);
>       run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
>   
> +    /*
> +     * There is a race condition between the starting of the vCPU
> +     * thread at the end of cpu_common_realizefn and when realized is
> +     * finally set.
> +     */

I'd like we simply assert(DEVICE(cpu)->realized) here;
I still don't understand when this can be called while
the vcpu isn't yet realized.

>       if (DEVICE(cpu)->realized) {
>           async_run_on_cpu(cpu, plugin_cpu_update__async, mask);
>       } else {
> diff --git a/system/cpus.c b/system/cpus.c
> index d3640c9503..7dd8464c5e 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -488,11 +488,13 @@ void cpus_kick_thread(CPUState *cpu)
>   
>   void qemu_cpu_kick(CPUState *cpu)
>   {
> -    qemu_cond_broadcast(cpu->halt_cond);
> -    if (cpus_accel->kick_vcpu_thread) {
> -        cpus_accel->kick_vcpu_thread(cpu);
> -    } else { /* default */
> -        cpus_kick_thread(cpu);
> +    if (cpu->halt_cond) {

cpu->halt_cond = NULL is a bug, why kicking a vcpu not
yet fully created?

> +        qemu_cond_broadcast(cpu->halt_cond);
> +        if (cpus_accel->kick_vcpu_thread) {
> +            cpus_accel->kick_vcpu_thread(cpu);
> +        } else { /* default */
> +            cpus_kick_thread(cpu);
> +        }
>       }
>   }
>   
> @@ -674,7 +676,10 @@ void qemu_init_vcpu(CPUState *cpu)
>           cpu->num_ases = 1;
>           cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
>       }
> +}
>   
> +void qemu_start_vcpu(CPUState *cpu)
> +{
>       /* accelerators all implement the AccelOpsClass */
>       g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
>       cpus_accel->create_vcpu_thread(cpu);
Alex Bennée May 29, 2024, 4:15 p.m. UTC | #2
Philippe Mathieu-Daudé <philmd@linaro.org> writes:

> Hi Alex,
>
> On 29/5/24 17:22, Alex Bennée wrote:
>> This ensures we don't start the thread until cpu_common_realizefn has
>> finished. This ensures that plugins will always run
>> qemu_plugin_vcpu_init__async first before any other states. It doesn't
>> totally eliminate the race that plugin_cpu_update__locked has to work
>> around though. I found this while reviewing the ips plugin which makes
>> heavy use of the vcpu phase callbacks.
>> An alternative might be to move the explicit creation of vCPU
>> threads
>> to qdev_machine_creation_done()? It doesn't affect user-mode which
>> already has a thread to execute in and ensures the QOM object has
>> completed creation in cpu_create() before continuing.
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
>>   include/hw/core/cpu.h      |  8 ++++++++
>>   accel/tcg/user-exec-stub.c |  5 +++++
>>   hw/core/cpu-common.c       |  7 ++++++-
>>   plugins/core.c             |  5 +++++
>>   system/cpus.c              | 15 ++++++++++-----
>>   5 files changed, 34 insertions(+), 6 deletions(-)
>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
>> index bb398e8237..6920699585 100644
>> --- a/include/hw/core/cpu.h
>> +++ b/include/hw/core/cpu.h
>> @@ -1041,6 +1041,14 @@ void end_exclusive(void);
>>    */
>>   void qemu_init_vcpu(CPUState *cpu);
>>   +/**
>> + * qemu_start_vcpu:
>> + * @cpu: The vCPU to start.
>> + *
>> + * Create the vCPU thread and start it running.
>> + */
>> +void qemu_start_vcpu(CPUState *cpu);
>> +
>>   #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
>>   #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
>>   #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
>> diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
>> index 4fbe2dbdc8..162bb72bbe 100644
>> --- a/accel/tcg/user-exec-stub.c
>> +++ b/accel/tcg/user-exec-stub.c
>> @@ -18,6 +18,11 @@ void cpu_exec_reset_hold(CPUState *cpu)
>>   {
>>   }
>>   +void qemu_start_vcpu(CPUState *cpu)
>> +{
>> +    /* NOP for user-mode, we already have a thread */
>> +}
>> +
>>   /* User mode emulation does not support record/replay yet.  */
>>     bool replay_exception(void)
>> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
>> index 0f0a247f56..68895ddd59 100644
>> --- a/hw/core/cpu-common.c
>> +++ b/hw/core/cpu-common.c
>> @@ -230,7 +230,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp)
>>       }
>>   #endif
>>   -    /* NOTE: latest generic point where the cpu is fully realized
>> */
>> +    /*
>> +     * With everything set up we can finally start the vCPU thread.
>> +     * This is a NOP for linux-user.
>> +     * NOTE: latest generic point where the cpu is fully realized
>> +     */
>> +    qemu_start_vcpu(cpu);
>>   }
>>     static void cpu_common_unrealizefn(DeviceState *dev)
>> diff --git a/plugins/core.c b/plugins/core.c
>> index 0726bc7f25..1e5da7853b 100644
>> --- a/plugins/core.c
>> +++ b/plugins/core.c
>> @@ -65,6 +65,11 @@ static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
>>       CPUState *cpu = container_of(k, CPUState, cpu_index);
>>       run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
>>   +    /*
>> +     * There is a race condition between the starting of the vCPU
>> +     * thread at the end of cpu_common_realizefn and when realized is
>> +     * finally set.
>> +     */
>
> I'd like we simply assert(DEVICE(cpu)->realized) here;
> I still don't understand when this can be called while
> the vcpu isn't yet realized.

It will be shortly but as the comment says we don't set it until we come
out of cpu_common_realizefn and return to QOM so you get:

  **
  ERROR:../../plugins/core.c:73:plugin_cpu_update__locked: assertion failed: (DEVICE(cpu)->realized)
  Bail out! ERROR:../../plugins/core.c:73:plugin_cpu_update__locked: assertion failed: (DEVICE(cpu)->realized)

  Thread 4 "qemu-system-aar" received signal SIGABRT, Aborted.
  [Switching to Thread 0x7fffe5e006c0 (LWP 1000969)]
  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
  44      ./nptl/pthread_kill.c: No such file or directory.
  (gdb) bt
  #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
  #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
  #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
  #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
  #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
  #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
  #6  0x000055555600e2c8 in plugin_cpu_update__locked (k=0x5555579e9ee8, v=<optimized out>, udata=<optimized out>) at ../../plugins/core.c:73
  #7  0x000055555600e5fc in qemu_plugin_vcpu_init_hook (cpu=0x5555579e9c20) at ../../plugins/core.c:261
  #8  0x00005555558ff106 in process_queued_cpu_work (cpu=0x5555579e9c20) at ../../cpu-common.c:360
  #9  0x00005555560100f6 in mttcg_cpu_thread_fn (arg=arg@entry=0x5555579e9c20) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
  #10 0x00005555561bcce8 in qemu_thread_start (args=0x555557a55d70) at ../../util/qemu-thread-posix.c:541
  #11 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
  #12 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81

Arguably I think we should just wait until machine is created I think.

>
>>       if (DEVICE(cpu)->realized) {
>>           async_run_on_cpu(cpu, plugin_cpu_update__async, mask);
>>       } else {
>> diff --git a/system/cpus.c b/system/cpus.c
>> index d3640c9503..7dd8464c5e 100644
>> --- a/system/cpus.c
>> +++ b/system/cpus.c
>> @@ -488,11 +488,13 @@ void cpus_kick_thread(CPUState *cpu)
>>     void qemu_cpu_kick(CPUState *cpu)
>>   {
>> -    qemu_cond_broadcast(cpu->halt_cond);
>> -    if (cpus_accel->kick_vcpu_thread) {
>> -        cpus_accel->kick_vcpu_thread(cpu);
>> -    } else { /* default */
>> -        cpus_kick_thread(cpu);
>> +    if (cpu->halt_cond) {
>
> cpu->halt_cond = NULL is a bug, why kicking a vcpu not
> yet fully created?

We are queuing work for when it is ready but we don't create
cpu->halt_cond until in the thread (this is mainly to workaround the
fact the rr_thread shares its context between multiple CPUStates).

>
>> +        qemu_cond_broadcast(cpu->halt_cond);
>> +        if (cpus_accel->kick_vcpu_thread) {
>> +            cpus_accel->kick_vcpu_thread(cpu);
>> +        } else { /* default */
>> +            cpus_kick_thread(cpu);
>> +        }
>>       }
>>   }
>>   @@ -674,7 +676,10 @@ void qemu_init_vcpu(CPUState *cpu)
>>           cpu->num_ases = 1;
>>           cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
>>       }
>> +}
>>   +void qemu_start_vcpu(CPUState *cpu)
>> +{
>>       /* accelerators all implement the AccelOpsClass */
>>       g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
>>       cpus_accel->create_vcpu_thread(cpu);
Pierrick Bouvier May 30, 2024, 5:31 p.m. UTC | #3
On 5/29/24 08:22, Alex Bennée wrote:
> This ensures we don't start the thread until cpu_common_realizefn has
> finished. This ensures that plugins will always run
> qemu_plugin_vcpu_init__async first before any other states. It doesn't
> totally eliminate the race that plugin_cpu_update__locked has to work
> around though. I found this while reviewing the ips plugin which makes
> heavy use of the vcpu phase callbacks.
> 
> An alternative might be to move the explicit creation of vCPU threads
> to qdev_machine_creation_done()? It doesn't affect user-mode which
> already has a thread to execute in and ensures the QOM object has
> completed creation in cpu_create() before continuing.
> 
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
> ---
>   include/hw/core/cpu.h      |  8 ++++++++
>   accel/tcg/user-exec-stub.c |  5 +++++
>   hw/core/cpu-common.c       |  7 ++++++-
>   plugins/core.c             |  5 +++++
>   system/cpus.c              | 15 ++++++++++-----
>   5 files changed, 34 insertions(+), 6 deletions(-)
> 
> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
> index bb398e8237..6920699585 100644
> --- a/include/hw/core/cpu.h
> +++ b/include/hw/core/cpu.h
> @@ -1041,6 +1041,14 @@ void end_exclusive(void);
>    */
>   void qemu_init_vcpu(CPUState *cpu);
>   
> +/**
> + * qemu_start_vcpu:
> + * @cpu: The vCPU to start.
> + *
> + * Create the vCPU thread and start it running.
> + */
> +void qemu_start_vcpu(CPUState *cpu);
> +
>   #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
>   #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
>   #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
> diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
> index 4fbe2dbdc8..162bb72bbe 100644
> --- a/accel/tcg/user-exec-stub.c
> +++ b/accel/tcg/user-exec-stub.c
> @@ -18,6 +18,11 @@ void cpu_exec_reset_hold(CPUState *cpu)
>   {
>   }
>   
> +void qemu_start_vcpu(CPUState *cpu)
> +{
> +    /* NOP for user-mode, we already have a thread */
> +}
> +
>   /* User mode emulation does not support record/replay yet.  */
>   
>   bool replay_exception(void)
> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
> index 0f0a247f56..68895ddd59 100644
> --- a/hw/core/cpu-common.c
> +++ b/hw/core/cpu-common.c
> @@ -230,7 +230,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp)
>       }
>   #endif
>   
> -    /* NOTE: latest generic point where the cpu is fully realized */
> +    /*
> +     * With everything set up we can finally start the vCPU thread.
> +     * This is a NOP for linux-user.
> +     * NOTE: latest generic point where the cpu is fully realized
> +     */
> +    qemu_start_vcpu(cpu);
>   }
>   
>   static void cpu_common_unrealizefn(DeviceState *dev)
> diff --git a/plugins/core.c b/plugins/core.c
> index 0726bc7f25..1e5da7853b 100644
> --- a/plugins/core.c
> +++ b/plugins/core.c
> @@ -65,6 +65,11 @@ static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
>       CPUState *cpu = container_of(k, CPUState, cpu_index);
>       run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
>   
> +    /*
> +     * There is a race condition between the starting of the vCPU
> +     * thread at the end of cpu_common_realizefn and when realized is
> +     * finally set.
> +     */

Could we simply have an active wait here?
while (!DEVICE(cpu)->realized) {}

We have a guarantee it will be realized shortly, and if it's too hard to 
have a proper synchronization mechanism (introduce a realize_cond?), 
then waiting for the proper state does not seem too bad.

It's a bit strange for me to document an existing race condition, 
instead of finding a solution.

>       if (DEVICE(cpu)->realized) {
>           async_run_on_cpu(cpu, plugin_cpu_update__async, mask);
>       } else {
> diff --git a/system/cpus.c b/system/cpus.c
> index d3640c9503..7dd8464c5e 100644
> --- a/system/cpus.c
> +++ b/system/cpus.c
> @@ -488,11 +488,13 @@ void cpus_kick_thread(CPUState *cpu)
>   
>   void qemu_cpu_kick(CPUState *cpu)
>   {
> -    qemu_cond_broadcast(cpu->halt_cond);
> -    if (cpus_accel->kick_vcpu_thread) {
> -        cpus_accel->kick_vcpu_thread(cpu);
> -    } else { /* default */
> -        cpus_kick_thread(cpu);
> +    if (cpu->halt_cond) {
> +        qemu_cond_broadcast(cpu->halt_cond);
> +        if (cpus_accel->kick_vcpu_thread) {
> +            cpus_accel->kick_vcpu_thread(cpu);
> +        } else { /* default */
> +            cpus_kick_thread(cpu);
> +        }
>       }
>   }
>   
> @@ -674,7 +676,10 @@ void qemu_init_vcpu(CPUState *cpu)
>           cpu->num_ases = 1;
>           cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
>       }
> +}
>   
> +void qemu_start_vcpu(CPUState *cpu)
> +{
>       /* accelerators all implement the AccelOpsClass */
>       g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
>       cpus_accel->create_vcpu_thread(cpu);
Alex Bennée May 30, 2024, 7:21 p.m. UTC | #4
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 5/29/24 08:22, Alex Bennée wrote:
>> This ensures we don't start the thread until cpu_common_realizefn has
>> finished. This ensures that plugins will always run
>> qemu_plugin_vcpu_init__async first before any other states. It doesn't
>> totally eliminate the race that plugin_cpu_update__locked has to work
>> around though. I found this while reviewing the ips plugin which makes
>> heavy use of the vcpu phase callbacks.
>> An alternative might be to move the explicit creation of vCPU
>> threads
>> to qdev_machine_creation_done()? It doesn't affect user-mode which
>> already has a thread to execute in and ensures the QOM object has
>> completed creation in cpu_create() before continuing.
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> Cc: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
>> ---
>>   include/hw/core/cpu.h      |  8 ++++++++
>>   accel/tcg/user-exec-stub.c |  5 +++++
>>   hw/core/cpu-common.c       |  7 ++++++-
>>   plugins/core.c             |  5 +++++
>>   system/cpus.c              | 15 ++++++++++-----
>>   5 files changed, 34 insertions(+), 6 deletions(-)
>> diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
>> index bb398e8237..6920699585 100644
>> --- a/include/hw/core/cpu.h
>> +++ b/include/hw/core/cpu.h
>> @@ -1041,6 +1041,14 @@ void end_exclusive(void);
>>    */
>>   void qemu_init_vcpu(CPUState *cpu);
>>   +/**
>> + * qemu_start_vcpu:
>> + * @cpu: The vCPU to start.
>> + *
>> + * Create the vCPU thread and start it running.
>> + */
>> +void qemu_start_vcpu(CPUState *cpu);
>> +
>>   #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
>>   #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
>>   #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
>> diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
>> index 4fbe2dbdc8..162bb72bbe 100644
>> --- a/accel/tcg/user-exec-stub.c
>> +++ b/accel/tcg/user-exec-stub.c
>> @@ -18,6 +18,11 @@ void cpu_exec_reset_hold(CPUState *cpu)
>>   {
>>   }
>>   +void qemu_start_vcpu(CPUState *cpu)
>> +{
>> +    /* NOP for user-mode, we already have a thread */
>> +}
>> +
>>   /* User mode emulation does not support record/replay yet.  */
>>     bool replay_exception(void)
>> diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
>> index 0f0a247f56..68895ddd59 100644
>> --- a/hw/core/cpu-common.c
>> +++ b/hw/core/cpu-common.c
>> @@ -230,7 +230,12 @@ static void cpu_common_realizefn(DeviceState *dev, Error **errp)
>>       }
>>   #endif
>>   -    /* NOTE: latest generic point where the cpu is fully realized
>> */
>> +    /*
>> +     * With everything set up we can finally start the vCPU thread.
>> +     * This is a NOP for linux-user.
>> +     * NOTE: latest generic point where the cpu is fully realized
>> +     */
>> +    qemu_start_vcpu(cpu);
>>   }
>>     static void cpu_common_unrealizefn(DeviceState *dev)
>> diff --git a/plugins/core.c b/plugins/core.c
>> index 0726bc7f25..1e5da7853b 100644
>> --- a/plugins/core.c
>> +++ b/plugins/core.c
>> @@ -65,6 +65,11 @@ static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
>>       CPUState *cpu = container_of(k, CPUState, cpu_index);
>>       run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
>>   +    /*
>> +     * There is a race condition between the starting of the vCPU
>> +     * thread at the end of cpu_common_realizefn and when realized is
>> +     * finally set.
>> +     */
>
> Could we simply have an active wait here?
> while (!DEVICE(cpu)->realized) {}
>
> We have a guarantee it will be realized shortly, and if it's too hard
> to have a proper synchronization mechanism (introduce a
> realize_cond?), then waiting for the proper state does not seem too
> bad.
>
> It's a bit strange for me to document an existing race condition,
> instead of finding a solution.

If only it were that simple ;-)

Having been digging into this today it looks like there is a careful set
of dependencies on when threads need to be created during CPU
realization. I did try pushing the thread creation out of realization
but that breaks things like KVM which can't initialise some things
until the thread is up.

I'm now trying to move the plugin queuing its async work to after things
are initialised and before threads are created. My only concern now is
if I need to avoid kicking threads before they are created.
diff mbox series

Patch

diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index bb398e8237..6920699585 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -1041,6 +1041,14 @@  void end_exclusive(void);
  */
 void qemu_init_vcpu(CPUState *cpu);
 
+/**
+ * qemu_start_vcpu:
+ * @cpu: The vCPU to start.
+ *
+ * Create the vCPU thread and start it running.
+ */
+void qemu_start_vcpu(CPUState *cpu);
+
 #define SSTEP_ENABLE  0x1  /* Enable simulated HW single stepping */
 #define SSTEP_NOIRQ   0x2  /* Do not use IRQ while single stepping */
 #define SSTEP_NOTIMER 0x4  /* Do not Timers while single stepping */
diff --git a/accel/tcg/user-exec-stub.c b/accel/tcg/user-exec-stub.c
index 4fbe2dbdc8..162bb72bbe 100644
--- a/accel/tcg/user-exec-stub.c
+++ b/accel/tcg/user-exec-stub.c
@@ -18,6 +18,11 @@  void cpu_exec_reset_hold(CPUState *cpu)
 {
 }
 
+void qemu_start_vcpu(CPUState *cpu)
+{
+    /* NOP for user-mode, we already have a thread */
+}
+
 /* User mode emulation does not support record/replay yet.  */
 
 bool replay_exception(void)
diff --git a/hw/core/cpu-common.c b/hw/core/cpu-common.c
index 0f0a247f56..68895ddd59 100644
--- a/hw/core/cpu-common.c
+++ b/hw/core/cpu-common.c
@@ -230,7 +230,12 @@  static void cpu_common_realizefn(DeviceState *dev, Error **errp)
     }
 #endif
 
-    /* NOTE: latest generic point where the cpu is fully realized */
+    /*
+     * With everything set up we can finally start the vCPU thread.
+     * This is a NOP for linux-user.
+     * NOTE: latest generic point where the cpu is fully realized
+     */
+    qemu_start_vcpu(cpu);
 }
 
 static void cpu_common_unrealizefn(DeviceState *dev)
diff --git a/plugins/core.c b/plugins/core.c
index 0726bc7f25..1e5da7853b 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -65,6 +65,11 @@  static void plugin_cpu_update__locked(gpointer k, gpointer v, gpointer udata)
     CPUState *cpu = container_of(k, CPUState, cpu_index);
     run_on_cpu_data mask = RUN_ON_CPU_HOST_ULONG(*plugin.mask);
 
+    /*
+     * There is a race condition between the starting of the vCPU
+     * thread at the end of cpu_common_realizefn and when realized is
+     * finally set.
+     */
     if (DEVICE(cpu)->realized) {
         async_run_on_cpu(cpu, plugin_cpu_update__async, mask);
     } else {
diff --git a/system/cpus.c b/system/cpus.c
index d3640c9503..7dd8464c5e 100644
--- a/system/cpus.c
+++ b/system/cpus.c
@@ -488,11 +488,13 @@  void cpus_kick_thread(CPUState *cpu)
 
 void qemu_cpu_kick(CPUState *cpu)
 {
-    qemu_cond_broadcast(cpu->halt_cond);
-    if (cpus_accel->kick_vcpu_thread) {
-        cpus_accel->kick_vcpu_thread(cpu);
-    } else { /* default */
-        cpus_kick_thread(cpu);
+    if (cpu->halt_cond) {
+        qemu_cond_broadcast(cpu->halt_cond);
+        if (cpus_accel->kick_vcpu_thread) {
+            cpus_accel->kick_vcpu_thread(cpu);
+        } else { /* default */
+            cpus_kick_thread(cpu);
+        }
     }
 }
 
@@ -674,7 +676,10 @@  void qemu_init_vcpu(CPUState *cpu)
         cpu->num_ases = 1;
         cpu_address_space_init(cpu, 0, "cpu-memory", cpu->memory);
     }
+}
 
+void qemu_start_vcpu(CPUState *cpu)
+{
     /* accelerators all implement the AccelOpsClass */
     g_assert(cpus_accel != NULL && cpus_accel->create_vcpu_thread != NULL);
     cpus_accel->create_vcpu_thread(cpu);