diff mbox series

[5/5] contrib/plugins: add ips plugin example for cost modeling

Message ID 20240516222047.1853459-6-pierrick.bouvier@linaro.org
State Superseded
Headers show
Series Implement icount=auto using TCG Plugins | expand

Commit Message

Pierrick Bouvier May 16, 2024, 10:20 p.m. UTC
This plugin uses the new time control interface to make decisions
about the state of time during the emulation. The algorithm is
currently very simple. The user specifies an ips rate which applies
per core. If the core runs ahead of its allocated execution time the
plugin sleeps for a bit to let real time catch up. Either way time is
updated for the emulation as a function of total executed instructions
with some adjustments for cores that idle.

Examples
--------

Slow down execution of /bin/true:
$ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
$ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
real 4.000s

Boot a Linux kernel simulating a 250MHz cpu:
$ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
check time until kernel panic on serial0

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
 contrib/plugins/Makefile |   1 +
 2 files changed, 240 insertions(+)
 create mode 100644 contrib/plugins/ips.c

Comments

Alex Bennée May 28, 2024, 7:14 p.m. UTC | #1
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> This plugin uses the new time control interface to make decisions
> about the state of time during the emulation. The algorithm is
> currently very simple. The user specifies an ips rate which applies
> per core. If the core runs ahead of its allocated execution time the
> plugin sleeps for a bit to let real time catch up. Either way time is
> updated for the emulation as a function of total executed instructions
> with some adjustments for cores that idle.
>
> Examples
> --------
>
> Slow down execution of /bin/true:
> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
> real 4.000s
>
> Boot a Linux kernel simulating a 250MHz cpu:
> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
> check time until kernel panic on serial0
>
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>  contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
>  contrib/plugins/Makefile |   1 +
>  2 files changed, 240 insertions(+)
>  create mode 100644 contrib/plugins/ips.c
>
> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
> new file mode 100644
> index 00000000000..cf3159df391
> --- /dev/null
> +++ b/contrib/plugins/ips.c
> @@ -0,0 +1,239 @@
> +/*
> + * ips rate limiting plugin.
> + *
> + * This plugin can be used to restrict the execution of a system to a
> + * particular number of Instructions Per Second (ips). This controls
> + * time as seen by the guest so while wall-clock time may be longer
> + * from the guests point of view time will pass at the normal rate.
> + *
> + * This uses the new plugin API which allows the plugin to control
> + * system time.
> + *
> + * Copyright (c) 2023 Linaro Ltd
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <stdio.h>
> +#include <glib.h>
> +#include <qemu-plugin.h>
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
> +
> +/* how many times do we update time per sec */
> +#define NUM_TIME_UPDATE_PER_SEC 10
> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
> +
> +static GMutex global_state_lock;
> +
> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second */
> +static uint64_t insn_quantum; /* trap every N instructions */
> +static bool precise_execution; /* count every instruction */
> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started */
> +static int64_t virtual_time_ns; /* last set virtual time */
> +
> +static const void *time_handle;
> +
> +typedef enum {
> +    UNKNOWN = 0,
> +    EXECUTING,
> +    IDLE,
> +    FINISHED
> +} vCPUState;
> +
> +typedef struct {
> +    uint64_t counter;
> +    uint64_t track_insn;
> +    vCPUState state;
> +    /* timestamp when vCPU entered state */
> +    int64_t last_state_time;
> +} vCPUTime;
> +
> +struct qemu_plugin_scoreboard *vcpus;
> +
> +/* return epoch time in ns */
> +static int64_t now_ns(void)
> +{
> +    return g_get_real_time() * 1000;
> +}
> +
> +static uint64_t num_insn_during(int64_t elapsed_ns)
> +{
> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
> +    return num_secs * (double) insn_per_second;
> +}
> +
> +static int64_t time_for_insn(uint64_t num_insn)
> +{
> +    double num_secs = (double) num_insn / (double) insn_per_second;
> +    return num_secs * (double) NSEC_IN_ONE_SEC;
> +}
> +
> +static int64_t uptime_ns(void)
> +{
> +    int64_t now = now_ns();
> +    g_assert(now >= start_time_ns);
> +    return now - start_time_ns;
> +}
> +
> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
> +{
> +    vcpu->last_state_time = now_ns();
> +    vcpu->state = new_state;
> +}
> +
> +static void update_system_time(vCPUTime *vcpu)
> +{
> +    /* flush remaining instructions */
> +    vcpu->counter += vcpu->track_insn;
> +    vcpu->track_insn = 0;
> +
> +    int64_t uptime = uptime_ns();
> +    uint64_t expected_insn = num_insn_during(uptime);
> +
> +    if (vcpu->counter >= expected_insn) {
> +        /* this vcpu ran faster than expected, so it has to sleep */
> +        uint64_t insn_advance = vcpu->counter - expected_insn;
> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
> +        int64_t sleep_us = time_advance_ns / 1000;
> +        g_usleep(sleep_us);
> +    }
> +
> +    /* based on number of instructions, what should be the new time? */
> +    int64_t new_virtual_time = time_for_insn(vcpu->counter);
> +
> +    g_mutex_lock(&global_state_lock);
> +
> +    /* Time only moves forward. Another vcpu might have updated it already. */
> +    if (new_virtual_time > virtual_time_ns) {
> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
> +        virtual_time_ns = new_virtual_time;
> +    }
> +
> +    g_mutex_unlock(&global_state_lock);
> +}
> +
> +static void set_start_time()
> +{
> +    g_mutex_lock(&global_state_lock);
> +    if (!start_time_ns) {
> +        start_time_ns = now_ns();
> +    }
> +    g_mutex_unlock(&global_state_lock);
> +}
> +
> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    /* ensure start time is set first */
> +    set_start_time();
> +    /* start counter from absolute time reference */
> +    vcpu->counter = num_insn_during(uptime_ns());
> +    vcpu_set_state(vcpu, EXECUTING);
> +}
> +
> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    vcpu_set_state(vcpu, IDLE);
> +}
> +
> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    g_assert(vcpu->state == IDLE);

I'm triggering a weird race here:

  (gdb) b vcpu_init
  Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
  (gdb) r
  The program being debugged has been started already.
  Start it from the beginning? (y or n) y
  Starting program: /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64 -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars -cpu cortex-a57 -smp 32 -accel tcg -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -device scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22 -blockdev driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap -serial mon:stdio -blockdev node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true -blockdev node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append root=/dev/sda2\ console=ttyAMA0 -plugin contrib/plugins/libips.so,ips=1000000000
  [Thread debugging using libthread_db enabled]
  Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
  [New Thread 0x7fffe72006c0 (LWP 360538)]
  [New Thread 0x7fffe68006c0 (LWP 360540)]
  [New Thread 0x7fffe5e006c0 (LWP 360541)]
  [New Thread 0x7fffe54006c0 (LWP 360542)]
  [New Thread 0x7fffe4a006c0 (LWP 360543)]
  [New Thread 0x7fffdfe006c0 (LWP 360544)]
  [New Thread 0x7fffdf4006c0 (LWP 360545)]
  [New Thread 0x7fffdea006c0 (LWP 360546)]
  [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]

  Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=0) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb) c
  Continuing.
  [New Thread 0x7fffde0006c0 (LWP 360548)]
  [Switching to Thread 0x7fffdea006c0 (LWP 360546)]

  Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=1) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb)
  Continuing.
  [New Thread 0x7fffdd6006c0 (LWP 360549)]
  [Switching to Thread 0x7fffde0006c0 (LWP 360548)]

  Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=2) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb)
  Continuing.
  [New Thread 0x7fffdcc006c0 (LWP 360550)]
  [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]

  Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=3) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb)
  Continuing.
  [New Thread 0x7fffd3e006c0 (LWP 360551)]
  [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]

  Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=4) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb) n
  129         set_start_time();
  (gdb)
  131         vcpu->counter = num_insn_during(uptime_ns());
  (gdb)
  132         vcpu_set_state(vcpu, EXECUTING);
  (gdb)
  133     }
  (gdb) p vcpu->state
  $1 = EXECUTING
  (gdb) p &vcpu->state
  $2 = (vCPUState *) 0x555557c6b5d0
  (gdb) watch *(vCPUState *) 0x555557c6b5d0
  Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
  (gdb) c
  Continuing.
  [Thread 0x7fffdfe006c0 (LWP 360544) exited]
  [Thread 0x7fffe5e006c0 (LWP 360541) exited]

  Thread 12 "qemu-system-aar" hit Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0

  Old value = EXECUTING
  New value = IDLE
  vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
  83      }
  (gdb) c
  Continuing.
  [New Thread 0x7fffdfe006c0 (LWP 360591)]
  [New Thread 0x7fffe5e006c0 (LWP 360592)]
  [New Thread 0x7fffd34006c0 (LWP 360593)]
  [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]

  Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=6) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
  127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
  (gdb)
  Continuing.
  [New Thread 0x7fffd2a006c0 (LWP 360594)]
  **
  ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
  Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)

  Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
  [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
  44      ./nptl/pthread_kill.c: No such file or directory.
  (gdb) bt
  #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
  #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
  #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
  #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
  #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
  #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
  #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
  #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
  #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
  #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
  #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
  #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
  #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
  #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
  (gdb)

But I don't understand how we can ever hit the idle callback without
first hitting the init callback.

> +    int64_t idle_time = now_ns() - vcpu->last_state_time;
> +    /* accumulate expected number of instructions */
> +    vcpu->counter += num_insn_during(idle_time);
> +    vcpu_set_state(vcpu, EXECUTING);
> +}
> +
> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    vcpu_set_state(vcpu, FINISHED);
> +    update_system_time(vcpu);
> +    vcpu->counter = 0;
> +}
> +
> +static void every_insn_quantum(unsigned int cpu_index, void *udata)
> +{
> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
> +    g_assert(vcpu->track_insn >= insn_quantum);
> +    update_system_time(vcpu);
> +}
> +
> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
> +{
> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
> +    qemu_plugin_u64 track_insn =
> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
> +    if (precise_execution) {
> +        /* count (and eventually trap) on every instruction */
> +        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
> +            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
> +            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
> +                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
> +            qemu_plugin_register_vcpu_insn_exec_cond_cb(
> +                insn, every_insn_quantum,
> +                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> +                track_insn, insn_quantum, NULL);
> +        }
> +    } else {
> +        /* count (and eventually trap) once per tb */
> +        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
> +            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
> +        qemu_plugin_register_vcpu_tb_exec_cond_cb(
> +            tb, every_insn_quantum,
> +            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
> +            track_insn, insn_quantum, NULL);
> +    }
> +}
> +
> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
> +{
> +    qemu_plugin_scoreboard_free(vcpus);
> +}
> +
> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
> +                                           const qemu_info_t *info, int argc,
> +                                           char **argv)
> +{
> +    for (int i = 0; i < argc; i++) {
> +        char *opt = argv[i];
> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
> +        if (g_strcmp0(tokens[0], "ips") == 0) {
> +            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
> +            if (!insn_per_second && errno) {
> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
> +                        __func__, tokens[1], g_strerror(errno));
> +                return -1;
> +            }
> +
> +        } else if (g_strcmp0(tokens[0], "precise") == 0) {
> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
> +                                        &precise_execution)) {
> +                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +        } else {
> +            fprintf(stderr, "option parsing failed: %s\n", opt);
> +            return -1;
> +        }
> +    }
> +
> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
> +    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
> +
> +    time_handle = qemu_plugin_request_time_control();
> +    g_assert(time_handle);
> +
> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
> +    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
> +    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
> +
> +    return 0;
> +}
> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
> index 0b64d2c1e3a..449ead11305 100644
> --- a/contrib/plugins/Makefile
> +++ b/contrib/plugins/Makefile
> @@ -27,6 +27,7 @@ endif
>  NAMES += hwprofile
>  NAMES += cache
>  NAMES += drcov
> +NAMES += ips
>  
>  ifeq ($(CONFIG_WIN32),y)
>  SO_SUFFIX := .dll
Pierrick Bouvier May 28, 2024, 7:26 p.m. UTC | #2
On 5/28/24 12:14, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
>> This plugin uses the new time control interface to make decisions
>> about the state of time during the emulation. The algorithm is
>> currently very simple. The user specifies an ips rate which applies
>> per core. If the core runs ahead of its allocated execution time the
>> plugin sleeps for a bit to let real time catch up. Either way time is
>> updated for the emulation as a function of total executed instructions
>> with some adjustments for cores that idle.
>>
>> Examples
>> --------
>>
>> Slow down execution of /bin/true:
>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>> real 4.000s
>>
>> Boot a Linux kernel simulating a 250MHz cpu:
>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>> check time until kernel panic on serial0
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>   contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
>>   contrib/plugins/Makefile |   1 +
>>   2 files changed, 240 insertions(+)
>>   create mode 100644 contrib/plugins/ips.c
>>
>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>> new file mode 100644
>> index 00000000000..cf3159df391
>> --- /dev/null
>> +++ b/contrib/plugins/ips.c
>> @@ -0,0 +1,239 @@
>> +/*
>> + * ips rate limiting plugin.
>> + *
>> + * This plugin can be used to restrict the execution of a system to a
>> + * particular number of Instructions Per Second (ips). This controls
>> + * time as seen by the guest so while wall-clock time may be longer
>> + * from the guests point of view time will pass at the normal rate.
>> + *
>> + * This uses the new plugin API which allows the plugin to control
>> + * system time.
>> + *
>> + * Copyright (c) 2023 Linaro Ltd
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <stdio.h>
>> +#include <glib.h>
>> +#include <qemu-plugin.h>
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>> +
>> +/* how many times do we update time per sec */
>> +#define NUM_TIME_UPDATE_PER_SEC 10
>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>> +
>> +static GMutex global_state_lock;
>> +
>> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second */
>> +static uint64_t insn_quantum; /* trap every N instructions */
>> +static bool precise_execution; /* count every instruction */
>> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started */
>> +static int64_t virtual_time_ns; /* last set virtual time */
>> +
>> +static const void *time_handle;
>> +
>> +typedef enum {
>> +    UNKNOWN = 0,
>> +    EXECUTING,
>> +    IDLE,
>> +    FINISHED
>> +} vCPUState;
>> +
>> +typedef struct {
>> +    uint64_t counter;
>> +    uint64_t track_insn;
>> +    vCPUState state;
>> +    /* timestamp when vCPU entered state */
>> +    int64_t last_state_time;
>> +} vCPUTime;
>> +
>> +struct qemu_plugin_scoreboard *vcpus;
>> +
>> +/* return epoch time in ns */
>> +static int64_t now_ns(void)
>> +{
>> +    return g_get_real_time() * 1000;
>> +}
>> +
>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>> +{
>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>> +    return num_secs * (double) insn_per_second;
>> +}
>> +
>> +static int64_t time_for_insn(uint64_t num_insn)
>> +{
>> +    double num_secs = (double) num_insn / (double) insn_per_second;
>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>> +}
>> +
>> +static int64_t uptime_ns(void)
>> +{
>> +    int64_t now = now_ns();
>> +    g_assert(now >= start_time_ns);
>> +    return now - start_time_ns;
>> +}
>> +
>> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
>> +{
>> +    vcpu->last_state_time = now_ns();
>> +    vcpu->state = new_state;
>> +}
>> +
>> +static void update_system_time(vCPUTime *vcpu)
>> +{
>> +    /* flush remaining instructions */
>> +    vcpu->counter += vcpu->track_insn;
>> +    vcpu->track_insn = 0;
>> +
>> +    int64_t uptime = uptime_ns();
>> +    uint64_t expected_insn = num_insn_during(uptime);
>> +
>> +    if (vcpu->counter >= expected_insn) {
>> +        /* this vcpu ran faster than expected, so it has to sleep */
>> +        uint64_t insn_advance = vcpu->counter - expected_insn;
>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>> +        int64_t sleep_us = time_advance_ns / 1000;
>> +        g_usleep(sleep_us);
>> +    }
>> +
>> +    /* based on number of instructions, what should be the new time? */
>> +    int64_t new_virtual_time = time_for_insn(vcpu->counter);
>> +
>> +    g_mutex_lock(&global_state_lock);
>> +
>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>> +    if (new_virtual_time > virtual_time_ns) {
>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>> +        virtual_time_ns = new_virtual_time;
>> +    }
>> +
>> +    g_mutex_unlock(&global_state_lock);
>> +}
>> +
>> +static void set_start_time()
>> +{
>> +    g_mutex_lock(&global_state_lock);
>> +    if (!start_time_ns) {
>> +        start_time_ns = now_ns();
>> +    }
>> +    g_mutex_unlock(&global_state_lock);
>> +}
>> +
>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    /* ensure start time is set first */
>> +    set_start_time();
>> +    /* start counter from absolute time reference */
>> +    vcpu->counter = num_insn_during(uptime_ns());
>> +    vcpu_set_state(vcpu, EXECUTING);
>> +}
>> +
>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    vcpu_set_state(vcpu, IDLE);
>> +}
>> +
>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    g_assert(vcpu->state == IDLE);
> 
> I'm triggering a weird race here:
> 
>    (gdb) b vcpu_init
>    Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>    (gdb) r
>    The program being debugged has been started already.
>    Start it from the beginning? (y or n) y
>    Starting program: /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64 -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars -cpu cortex-a57 -smp 32 -accel tcg -device virtio-net-pci,netdev=unet -device virtio-scsi-pci -device scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22 -blockdev driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap -serial mon:stdio -blockdev node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true -blockdev node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append root=/dev/sda2\ console=ttyAMA0 -plugin contrib/plugins/libips.so,ips=1000000000
>    [Thread debugging using libthread_db enabled]
>    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>    [New Thread 0x7fffe72006c0 (LWP 360538)]
>    [New Thread 0x7fffe68006c0 (LWP 360540)]
>    [New Thread 0x7fffe5e006c0 (LWP 360541)]
>    [New Thread 0x7fffe54006c0 (LWP 360542)]
>    [New Thread 0x7fffe4a006c0 (LWP 360543)]
>    [New Thread 0x7fffdfe006c0 (LWP 360544)]
>    [New Thread 0x7fffdf4006c0 (LWP 360545)]
>    [New Thread 0x7fffdea006c0 (LWP 360546)]
>    [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
> 
>    Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=0) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb) c
>    Continuing.
>    [New Thread 0x7fffde0006c0 (LWP 360548)]
>    [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
> 
>    Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=1) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb)
>    Continuing.
>    [New Thread 0x7fffdd6006c0 (LWP 360549)]
>    [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
> 
>    Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=2) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb)
>    Continuing.
>    [New Thread 0x7fffdcc006c0 (LWP 360550)]
>    [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
> 
>    Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=3) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb)
>    Continuing.
>    [New Thread 0x7fffd3e006c0 (LWP 360551)]
>    [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
> 
>    Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=4) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb) n
>    129         set_start_time();
>    (gdb)
>    131         vcpu->counter = num_insn_during(uptime_ns());
>    (gdb)
>    132         vcpu_set_state(vcpu, EXECUTING);
>    (gdb)
>    133     }
>    (gdb) p vcpu->state
>    $1 = EXECUTING
>    (gdb) p &vcpu->state
>    $2 = (vCPUState *) 0x555557c6b5d0
>    (gdb) watch *(vCPUState *) 0x555557c6b5d0
>    Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>    (gdb) c
>    Continuing.
>    [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>    [Thread 0x7fffe5e006c0 (LWP 360541) exited]
> 
>    Thread 12 "qemu-system-aar" hit Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
> 
>    Old value = EXECUTING
>    New value = IDLE
>    vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>    83      }
>    (gdb) c
>    Continuing.
>    [New Thread 0x7fffdfe006c0 (LWP 360591)]
>    [New Thread 0x7fffe5e006c0 (LWP 360592)]
>    [New Thread 0x7fffd34006c0 (LWP 360593)]
>    [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
> 
>    Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init (id=10457908569352202058, cpu_index=6) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>    (gdb)
>    Continuing.
>    [New Thread 0x7fffd2a006c0 (LWP 360594)]
>    **
>    ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>    Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
> 
>    Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>    [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>    __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>    44      ./nptl/pthread_kill.c: No such file or directory.
>    (gdb) bt
>    #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>    #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>    #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>    #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>    #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>    #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>    #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>    #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>    #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>    #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>    #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>    #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>    #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>    #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>    (gdb)
> 
> But I don't understand how we can ever hit the idle callback without
> first hitting the init callback.
> 

More exactly, the assert we hit means that the idle callback, was not 
called before the resume callback.
Any chance you can check what is the current vcpu->state value? I wonder 
if it's not an exited cpu, that gets resumed after.

Thanks,
Pierrick

>> +    int64_t idle_time = now_ns() - vcpu->last_state_time;
>> +    /* accumulate expected number of instructions */
>> +    vcpu->counter += num_insn_during(idle_time);
>> +    vcpu_set_state(vcpu, EXECUTING);
>> +}
>> +
>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    vcpu_set_state(vcpu, FINISHED);
>> +    update_system_time(vcpu);
>> +    vcpu->counter = 0;
>> +}
>> +
>> +static void every_insn_quantum(unsigned int cpu_index, void *udata)
>> +{
>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>> +    g_assert(vcpu->track_insn >= insn_quantum);
>> +    update_system_time(vcpu);
>> +}
>> +
>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>> +{
>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>> +    qemu_plugin_u64 track_insn =
>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
>> +    if (precise_execution) {
>> +        /* count (and eventually trap) on every instruction */
>> +        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
>> +            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
>> +            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
>> +                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
>> +            qemu_plugin_register_vcpu_insn_exec_cond_cb(
>> +                insn, every_insn_quantum,
>> +                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>> +                track_insn, insn_quantum, NULL);
>> +        }
>> +    } else {
>> +        /* count (and eventually trap) once per tb */
>> +        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>> +            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
>> +        qemu_plugin_register_vcpu_tb_exec_cond_cb(
>> +            tb, every_insn_quantum,
>> +            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>> +            track_insn, insn_quantum, NULL);
>> +    }
>> +}
>> +
>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>> +{
>> +    qemu_plugin_scoreboard_free(vcpus);
>> +}
>> +
>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>> +                                           const qemu_info_t *info, int argc,
>> +                                           char **argv)
>> +{
>> +    for (int i = 0; i < argc; i++) {
>> +        char *opt = argv[i];
>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>> +            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>> +            if (!insn_per_second && errno) {
>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>> +                        __func__, tokens[1], g_strerror(errno));
>> +                return -1;
>> +            }
>> +
>> +        } else if (g_strcmp0(tokens[0], "precise") == 0) {
>> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
>> +                                        &precise_execution)) {
>> +                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
>> +                return -1;
>> +            }
>> +        } else {
>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>> +            return -1;
>> +        }
>> +    }
>> +
>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>> +    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>> +
>> +    time_handle = qemu_plugin_request_time_control();
>> +    g_assert(time_handle);
>> +
>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>> +    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
>> +    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>> +
>> +    return 0;
>> +}
>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>> index 0b64d2c1e3a..449ead11305 100644
>> --- a/contrib/plugins/Makefile
>> +++ b/contrib/plugins/Makefile
>> @@ -27,6 +27,7 @@ endif
>>   NAMES += hwprofile
>>   NAMES += cache
>>   NAMES += drcov
>> +NAMES += ips
>>   
>>   ifeq ($(CONFIG_WIN32),y)
>>   SO_SUFFIX := .dll
>
Alex Bennée May 28, 2024, 7:57 p.m. UTC | #3
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 5/28/24 12:14, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>> 
>>> This plugin uses the new time control interface to make decisions
>>> about the state of time during the emulation. The algorithm is
>>> currently very simple. The user specifies an ips rate which applies
>>> per core. If the core runs ahead of its allocated execution time the
>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>> updated for the emulation as a function of total executed instructions
>>> with some adjustments for cores that idle.
>>>
>>> Examples
>>> --------
>>>
>>> Slow down execution of /bin/true:
>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>> real 4.000s
>>>
>>> Boot a Linux kernel simulating a 250MHz cpu:
>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>> check time until kernel panic on serial0
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> ---
>>>   contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
>>>   contrib/plugins/Makefile |   1 +
>>>   2 files changed, 240 insertions(+)
>>>   create mode 100644 contrib/plugins/ips.c
>>>
>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>> new file mode 100644
>>> index 00000000000..cf3159df391
>>> --- /dev/null
>>> +++ b/contrib/plugins/ips.c
>>> @@ -0,0 +1,239 @@
>>> +/*
>>> + * ips rate limiting plugin.
>>> + *
>>> + * This plugin can be used to restrict the execution of a system to a
>>> + * particular number of Instructions Per Second (ips). This controls
>>> + * time as seen by the guest so while wall-clock time may be longer
>>> + * from the guests point of view time will pass at the normal rate.
>>> + *
>>> + * This uses the new plugin API which allows the plugin to control
>>> + * system time.
>>> + *
>>> + * Copyright (c) 2023 Linaro Ltd
>>> + *
>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>> + */
>>> +
>>> +#include <stdio.h>
>>> +#include <glib.h>
>>> +#include <qemu-plugin.h>
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>> +
>>> +/* how many times do we update time per sec */
>>> +#define NUM_TIME_UPDATE_PER_SEC 10
>>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>>> +
>>> +static GMutex global_state_lock;
>>> +
>>> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second */
>>> +static uint64_t insn_quantum; /* trap every N instructions */
>>> +static bool precise_execution; /* count every instruction */
>>> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started */
>>> +static int64_t virtual_time_ns; /* last set virtual time */
>>> +
>>> +static const void *time_handle;
>>> +
>>> +typedef enum {
>>> +    UNKNOWN = 0,
>>> +    EXECUTING,
>>> +    IDLE,
>>> +    FINISHED
>>> +} vCPUState;
>>> +
>>> +typedef struct {
>>> +    uint64_t counter;
>>> +    uint64_t track_insn;
>>> +    vCPUState state;
>>> +    /* timestamp when vCPU entered state */
>>> +    int64_t last_state_time;
>>> +} vCPUTime;
>>> +
>>> +struct qemu_plugin_scoreboard *vcpus;
>>> +
>>> +/* return epoch time in ns */
>>> +static int64_t now_ns(void)
>>> +{
>>> +    return g_get_real_time() * 1000;
>>> +}
>>> +
>>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>>> +{
>>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>>> +    return num_secs * (double) insn_per_second;
>>> +}
>>> +
>>> +static int64_t time_for_insn(uint64_t num_insn)
>>> +{
>>> +    double num_secs = (double) num_insn / (double) insn_per_second;
>>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>>> +}
>>> +
>>> +static int64_t uptime_ns(void)
>>> +{
>>> +    int64_t now = now_ns();
>>> +    g_assert(now >= start_time_ns);
>>> +    return now - start_time_ns;
>>> +}
>>> +
>>> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
>>> +{
>>> +    vcpu->last_state_time = now_ns();
>>> +    vcpu->state = new_state;
>>> +}
>>> +
>>> +static void update_system_time(vCPUTime *vcpu)
>>> +{
>>> +    /* flush remaining instructions */
>>> +    vcpu->counter += vcpu->track_insn;
>>> +    vcpu->track_insn = 0;
>>> +
>>> +    int64_t uptime = uptime_ns();
>>> +    uint64_t expected_insn = num_insn_during(uptime);
>>> +
>>> +    if (vcpu->counter >= expected_insn) {
>>> +        /* this vcpu ran faster than expected, so it has to sleep */
>>> +        uint64_t insn_advance = vcpu->counter - expected_insn;
>>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>>> +        int64_t sleep_us = time_advance_ns / 1000;
>>> +        g_usleep(sleep_us);
>>> +    }
>>> +
>>> +    /* based on number of instructions, what should be the new time? */
>>> +    int64_t new_virtual_time = time_for_insn(vcpu->counter);
>>> +
>>> +    g_mutex_lock(&global_state_lock);
>>> +
>>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>>> +    if (new_virtual_time > virtual_time_ns) {
>>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>>> +        virtual_time_ns = new_virtual_time;
>>> +    }
>>> +
>>> +    g_mutex_unlock(&global_state_lock);
>>> +}
>>> +
>>> +static void set_start_time()
>>> +{
>>> +    g_mutex_lock(&global_state_lock);
>>> +    if (!start_time_ns) {
>>> +        start_time_ns = now_ns();
>>> +    }
>>> +    g_mutex_unlock(&global_state_lock);
>>> +}
>>> +
>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    /* ensure start time is set first */
>>> +    set_start_time();
>>> +    /* start counter from absolute time reference */
>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>> +    vcpu_set_state(vcpu, EXECUTING);
>>> +}
>>> +
>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    vcpu_set_state(vcpu, IDLE);
>>> +}
>>> +
>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    g_assert(vcpu->state == IDLE);
>> I'm triggering a weird race here:
>>    (gdb) b vcpu_init
>>    Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>    (gdb) r
>>    The program being debugged has been started already.
>>    Start it from the beginning? (y or n) y
>>    Starting program:
>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>> -cpu cortex-a57 -smp 32 -accel tcg -device
>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>> -blockdev
>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>> -serial mon:stdio -blockdev
>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>> -blockdev
>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>> root=/dev/sda2\ console=ttyAMA0 -plugin
>> contrib/plugins/libips.so,ips=1000000000
>>    [Thread debugging using libthread_db enabled]
>>    Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>    [New Thread 0x7fffe72006c0 (LWP 360538)]
>>    [New Thread 0x7fffe68006c0 (LWP 360540)]
>>    [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>    [New Thread 0x7fffe54006c0 (LWP 360542)]
>>    [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>    [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>    [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>    [New Thread 0x7fffdea006c0 (LWP 360546)]
>>    [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>    Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=0) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb) c
>>    Continuing.
>>    [New Thread 0x7fffde0006c0 (LWP 360548)]
>>    [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>    Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=1) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>    [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>    Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=2) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>    [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>    Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=3) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>    [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>    Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=4) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb) n
>>    129         set_start_time();
>>    (gdb)
>>    131         vcpu->counter = num_insn_during(uptime_ns());
>>    (gdb)
>>    132         vcpu_set_state(vcpu, EXECUTING);
>>    (gdb)
>>    133     }
>>    (gdb) p vcpu->state
>>    $1 = EXECUTING
>>    (gdb) p &vcpu->state
>>    $2 = (vCPUState *) 0x555557c6b5d0
>>    (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>    Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>    (gdb) c
>>    Continuing.
>>    [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>    [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>    Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>> *(vCPUState *) 0x555557c6b5d0
>>    Old value = EXECUTING
>>    New value = IDLE
>>    vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>    83      }
>>    (gdb) c
>>    Continuing.
>>    [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>    [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>    [New Thread 0x7fffd34006c0 (LWP 360593)]
>>    [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>    Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>> (id=10457908569352202058, cpu_index=6) at
>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>    127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>    (gdb)
>>    Continuing.
>>    [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>    **
>>    ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>    Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>    Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>    [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>    __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>    44      ./nptl/pthread_kill.c: No such file or directory.
>>    (gdb) bt
>>    #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>    #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>    #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>>    #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>    #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>    #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>    #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>    #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>    #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>>    #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>>    #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>    #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>>    #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>>    #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>    (gdb)
>> But I don't understand how we can ever hit the idle callback without
>> first hitting the init callback.
>> 
>
> More exactly, the assert we hit means that the idle callback, was not
> called before the resume callback.
> Any chance you can check what is the current vcpu->state value? I
> wonder if it's not an exited cpu, that gets resumed after.

No when I looked at it it was set as UNKNOWN. Unfortunately it doesn't
trigger if I stick breakpoints in or run under rr.

>
> Thanks,
> Pierrick
>
>>> +    int64_t idle_time = now_ns() - vcpu->last_state_time;
>>> +    /* accumulate expected number of instructions */
>>> +    vcpu->counter += num_insn_during(idle_time);
>>> +    vcpu_set_state(vcpu, EXECUTING);
>>> +}
>>> +
>>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    vcpu_set_state(vcpu, FINISHED);
>>> +    update_system_time(vcpu);
>>> +    vcpu->counter = 0;
>>> +}
>>> +
>>> +static void every_insn_quantum(unsigned int cpu_index, void *udata)
>>> +{
>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>> +    g_assert(vcpu->track_insn >= insn_quantum);
>>> +    update_system_time(vcpu);
>>> +}
>>> +
>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>> +{
>>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>>> +    qemu_plugin_u64 track_insn =
>>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
>>> +    if (precise_execution) {
>>> +        /* count (and eventually trap) on every instruction */
>>> +        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
>>> +            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
>>> +            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
>>> +                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
>>> +            qemu_plugin_register_vcpu_insn_exec_cond_cb(
>>> +                insn, every_insn_quantum,
>>> +                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>> +                track_insn, insn_quantum, NULL);
>>> +        }
>>> +    } else {
>>> +        /* count (and eventually trap) once per tb */
>>> +        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>>> +            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
>>> +        qemu_plugin_register_vcpu_tb_exec_cond_cb(
>>> +            tb, every_insn_quantum,
>>> +            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>> +            track_insn, insn_quantum, NULL);
>>> +    }
>>> +}
>>> +
>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>> +{
>>> +    qemu_plugin_scoreboard_free(vcpus);
>>> +}
>>> +
>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>> +                                           const qemu_info_t *info, int argc,
>>> +                                           char **argv)
>>> +{
>>> +    for (int i = 0; i < argc; i++) {
>>> +        char *opt = argv[i];
>>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>>> +            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>>> +            if (!insn_per_second && errno) {
>>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>>> +                        __func__, tokens[1], g_strerror(errno));
>>> +                return -1;
>>> +            }
>>> +
>>> +        } else if (g_strcmp0(tokens[0], "precise") == 0) {
>>> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
>>> +                                        &precise_execution)) {
>>> +                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
>>> +                return -1;
>>> +            }
>>> +        } else {
>>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>>> +            return -1;
>>> +        }
>>> +    }
>>> +
>>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>>> +    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>>> +
>>> +    time_handle = qemu_plugin_request_time_control();
>>> +    g_assert(time_handle);
>>> +
>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>>> +    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
>>> +    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
>>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>> +
>>> +    return 0;
>>> +}
>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>> index 0b64d2c1e3a..449ead11305 100644
>>> --- a/contrib/plugins/Makefile
>>> +++ b/contrib/plugins/Makefile
>>> @@ -27,6 +27,7 @@ endif
>>>   NAMES += hwprofile
>>>   NAMES += cache
>>>   NAMES += drcov
>>> +NAMES += ips
>>>     ifeq ($(CONFIG_WIN32),y)
>>>   SO_SUFFIX := .dll
>>
Pierrick Bouvier May 28, 2024, 8:26 p.m. UTC | #4
On 5/28/24 12:57, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
>> On 5/28/24 12:14, Alex Bennée wrote:
>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>
>>>> This plugin uses the new time control interface to make decisions
>>>> about the state of time during the emulation. The algorithm is
>>>> currently very simple. The user specifies an ips rate which applies
>>>> per core. If the core runs ahead of its allocated execution time the
>>>> plugin sleeps for a bit to let real time catch up. Either way time is
>>>> updated for the emulation as a function of total executed instructions
>>>> with some adjustments for cores that idle.
>>>>
>>>> Examples
>>>> --------
>>>>
>>>> Slow down execution of /bin/true:
>>>> $ num_insn=$(./build/qemu-x86_64 -plugin ./build/tests/plugin/libinsn.so -d plugin /bin/true |& grep total | sed -e 's/.*: //')
>>>> $ time ./build/qemu-x86_64 -plugin ./build/contrib/plugins/libips.so,ips=$(($num_insn/4)) /bin/true
>>>> real 4.000s
>>>>
>>>> Boot a Linux kernel simulating a 250MHz cpu:
>>>> $ /build/qemu-system-x86_64 -kernel /boot/vmlinuz-6.1.0-21-amd64 -append "console=ttyS0" -plugin ./build/contrib/plugins/libips.so,ips=$((250*1000*1000)) -smp 1 -m 512
>>>> check time until kernel panic on serial0
>>>>
>>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>>> ---
>>>>    contrib/plugins/ips.c    | 239 +++++++++++++++++++++++++++++++++++++++
>>>>    contrib/plugins/Makefile |   1 +
>>>>    2 files changed, 240 insertions(+)
>>>>    create mode 100644 contrib/plugins/ips.c
>>>>
>>>> diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
>>>> new file mode 100644
>>>> index 00000000000..cf3159df391
>>>> --- /dev/null
>>>> +++ b/contrib/plugins/ips.c
>>>> @@ -0,0 +1,239 @@
>>>> +/*
>>>> + * ips rate limiting plugin.
>>>> + *
>>>> + * This plugin can be used to restrict the execution of a system to a
>>>> + * particular number of Instructions Per Second (ips). This controls
>>>> + * time as seen by the guest so while wall-clock time may be longer
>>>> + * from the guests point of view time will pass at the normal rate.
>>>> + *
>>>> + * This uses the new plugin API which allows the plugin to control
>>>> + * system time.
>>>> + *
>>>> + * Copyright (c) 2023 Linaro Ltd
>>>> + *
>>>> + * SPDX-License-Identifier: GPL-2.0-or-later
>>>> + */
>>>> +
>>>> +#include <stdio.h>
>>>> +#include <glib.h>
>>>> +#include <qemu-plugin.h>
>>>> +
>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>>>> +
>>>> +/* how many times do we update time per sec */
>>>> +#define NUM_TIME_UPDATE_PER_SEC 10
>>>> +#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
>>>> +
>>>> +static GMutex global_state_lock;
>>>> +
>>>> +static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second */
>>>> +static uint64_t insn_quantum; /* trap every N instructions */
>>>> +static bool precise_execution; /* count every instruction */
>>>> +static int64_t start_time_ns; /* time (ns since epoch) first vCPU started */
>>>> +static int64_t virtual_time_ns; /* last set virtual time */
>>>> +
>>>> +static const void *time_handle;
>>>> +
>>>> +typedef enum {
>>>> +    UNKNOWN = 0,
>>>> +    EXECUTING,
>>>> +    IDLE,
>>>> +    FINISHED
>>>> +} vCPUState;
>>>> +
>>>> +typedef struct {
>>>> +    uint64_t counter;
>>>> +    uint64_t track_insn;
>>>> +    vCPUState state;
>>>> +    /* timestamp when vCPU entered state */
>>>> +    int64_t last_state_time;
>>>> +} vCPUTime;
>>>> +
>>>> +struct qemu_plugin_scoreboard *vcpus;
>>>> +
>>>> +/* return epoch time in ns */
>>>> +static int64_t now_ns(void)
>>>> +{
>>>> +    return g_get_real_time() * 1000;
>>>> +}
>>>> +
>>>> +static uint64_t num_insn_during(int64_t elapsed_ns)
>>>> +{
>>>> +    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
>>>> +    return num_secs * (double) insn_per_second;
>>>> +}
>>>> +
>>>> +static int64_t time_for_insn(uint64_t num_insn)
>>>> +{
>>>> +    double num_secs = (double) num_insn / (double) insn_per_second;
>>>> +    return num_secs * (double) NSEC_IN_ONE_SEC;
>>>> +}
>>>> +
>>>> +static int64_t uptime_ns(void)
>>>> +{
>>>> +    int64_t now = now_ns();
>>>> +    g_assert(now >= start_time_ns);
>>>> +    return now - start_time_ns;
>>>> +}
>>>> +
>>>> +static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
>>>> +{
>>>> +    vcpu->last_state_time = now_ns();
>>>> +    vcpu->state = new_state;
>>>> +}
>>>> +
>>>> +static void update_system_time(vCPUTime *vcpu)
>>>> +{
>>>> +    /* flush remaining instructions */
>>>> +    vcpu->counter += vcpu->track_insn;
>>>> +    vcpu->track_insn = 0;
>>>> +
>>>> +    int64_t uptime = uptime_ns();
>>>> +    uint64_t expected_insn = num_insn_during(uptime);
>>>> +
>>>> +    if (vcpu->counter >= expected_insn) {
>>>> +        /* this vcpu ran faster than expected, so it has to sleep */
>>>> +        uint64_t insn_advance = vcpu->counter - expected_insn;
>>>> +        uint64_t time_advance_ns = time_for_insn(insn_advance);
>>>> +        int64_t sleep_us = time_advance_ns / 1000;
>>>> +        g_usleep(sleep_us);
>>>> +    }
>>>> +
>>>> +    /* based on number of instructions, what should be the new time? */
>>>> +    int64_t new_virtual_time = time_for_insn(vcpu->counter);
>>>> +
>>>> +    g_mutex_lock(&global_state_lock);
>>>> +
>>>> +    /* Time only moves forward. Another vcpu might have updated it already. */
>>>> +    if (new_virtual_time > virtual_time_ns) {
>>>> +        qemu_plugin_update_ns(time_handle, new_virtual_time);
>>>> +        virtual_time_ns = new_virtual_time;
>>>> +    }
>>>> +
>>>> +    g_mutex_unlock(&global_state_lock);
>>>> +}
>>>> +
>>>> +static void set_start_time()
>>>> +{
>>>> +    g_mutex_lock(&global_state_lock);
>>>> +    if (!start_time_ns) {
>>>> +        start_time_ns = now_ns();
>>>> +    }
>>>> +    g_mutex_unlock(&global_state_lock);
>>>> +}
>>>> +
>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    /* ensure start time is set first */
>>>> +    set_start_time();
>>>> +    /* start counter from absolute time reference */
>>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>>> +    vcpu_set_state(vcpu, EXECUTING);
>>>> +}
>>>> +
>>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    vcpu_set_state(vcpu, IDLE);
>>>> +}
>>>> +
>>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    g_assert(vcpu->state == IDLE);
>>> I'm triggering a weird race here:
>>>     (gdb) b vcpu_init
>>>     Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>>     (gdb) r
>>>     The program being debugged has been started already.
>>>     Start it from the beginning? (y or n) y
>>>     Starting program:
>>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>>> -cpu cortex-a57 -smp 32 -accel tcg -device
>>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>>> -blockdev
>>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>>> -serial mon:stdio -blockdev
>>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>>> -blockdev
>>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>>> root=/dev/sda2\ console=ttyAMA0 -plugin
>>> contrib/plugins/libips.so,ips=1000000000
>>>     [Thread debugging using libthread_db enabled]
>>>     Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>>     [New Thread 0x7fffe72006c0 (LWP 360538)]
>>>     [New Thread 0x7fffe68006c0 (LWP 360540)]
>>>     [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>>     [New Thread 0x7fffe54006c0 (LWP 360542)]
>>>     [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>>     [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>>     [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>>     [New Thread 0x7fffdea006c0 (LWP 360546)]
>>>     [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>>     Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=0) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb) c
>>>     Continuing.
>>>     [New Thread 0x7fffde0006c0 (LWP 360548)]
>>>     [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>>     Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=1) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb)
>>>     Continuing.
>>>     [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>>     [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>>     Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=2) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb)
>>>     Continuing.
>>>     [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>>     [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>>     Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=3) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb)
>>>     Continuing.
>>>     [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>>     [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>>     Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=4) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb) n
>>>     129         set_start_time();
>>>     (gdb)
>>>     131         vcpu->counter = num_insn_during(uptime_ns());
>>>     (gdb)
>>>     132         vcpu_set_state(vcpu, EXECUTING);
>>>     (gdb)
>>>     133     }
>>>     (gdb) p vcpu->state
>>>     $1 = EXECUTING
>>>     (gdb) p &vcpu->state
>>>     $2 = (vCPUState *) 0x555557c6b5d0
>>>     (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>>     Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>>     (gdb) c
>>>     Continuing.
>>>     [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>>     [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>>     Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>>> *(vCPUState *) 0x555557c6b5d0
>>>     Old value = EXECUTING
>>>     New value = IDLE
>>>     vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>>     83      }
>>>     (gdb) c
>>>     Continuing.
>>>     [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>>     [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>>     [New Thread 0x7fffd34006c0 (LWP 360593)]
>>>     [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>>     Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>> (id=10457908569352202058, cpu_index=6) at
>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>     (gdb)
>>>     Continuing.
>>>     [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>>     **
>>>     ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>     Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>     Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>>     [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>>     __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>     44      ./nptl/pthread_kill.c: No such file or directory.
>>>     (gdb) bt
>>>     #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>     #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>>     #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>>>     #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>>     #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>     #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>     #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>>     #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>>     #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>>>     #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>>>     #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>>     #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>>>     #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>>>     #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>>     (gdb)
>>> But I don't understand how we can ever hit the idle callback without
>>> first hitting the init callback.
>>>
>>
>> More exactly, the assert we hit means that the idle callback, was not
>> called before the resume callback.
>> Any chance you can check what is the current vcpu->state value? I
>> wonder if it's not an exited cpu, that gets resumed after.
> 
> No when I looked at it it was set as UNKNOWN. Unfortunately it doesn't
> trigger if I stick breakpoints in or run under rr.
>

By using rr record --chaos mode maybe?

>>
>> Thanks,
>> Pierrick
>>
>>>> +    int64_t idle_time = now_ns() - vcpu->last_state_time;
>>>> +    /* accumulate expected number of instructions */
>>>> +    vcpu->counter += num_insn_during(idle_time);
>>>> +    vcpu_set_state(vcpu, EXECUTING);
>>>> +}
>>>> +
>>>> +static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    vcpu_set_state(vcpu, FINISHED);
>>>> +    update_system_time(vcpu);
>>>> +    vcpu->counter = 0;
>>>> +}
>>>> +
>>>> +static void every_insn_quantum(unsigned int cpu_index, void *udata)
>>>> +{
>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>> +    g_assert(vcpu->track_insn >= insn_quantum);
>>>> +    update_system_time(vcpu);
>>>> +}
>>>> +
>>>> +static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>>>> +{
>>>> +    size_t n_insns = qemu_plugin_tb_n_insns(tb);
>>>> +    qemu_plugin_u64 track_insn =
>>>> +        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
>>>> +    if (precise_execution) {
>>>> +        /* count (and eventually trap) on every instruction */
>>>> +        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
>>>> +            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
>>>> +            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
>>>> +                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
>>>> +            qemu_plugin_register_vcpu_insn_exec_cond_cb(
>>>> +                insn, every_insn_quantum,
>>>> +                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>>> +                track_insn, insn_quantum, NULL);
>>>> +        }
>>>> +    } else {
>>>> +        /* count (and eventually trap) once per tb */
>>>> +        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
>>>> +            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
>>>> +        qemu_plugin_register_vcpu_tb_exec_cond_cb(
>>>> +            tb, every_insn_quantum,
>>>> +            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
>>>> +            track_insn, insn_quantum, NULL);
>>>> +    }
>>>> +}
>>>> +
>>>> +static void plugin_exit(qemu_plugin_id_t id, void *udata)
>>>> +{
>>>> +    qemu_plugin_scoreboard_free(vcpus);
>>>> +}
>>>> +
>>>> +QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>>>> +                                           const qemu_info_t *info, int argc,
>>>> +                                           char **argv)
>>>> +{
>>>> +    for (int i = 0; i < argc; i++) {
>>>> +        char *opt = argv[i];
>>>> +        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
>>>> +        if (g_strcmp0(tokens[0], "ips") == 0) {
>>>> +            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
>>>> +            if (!insn_per_second && errno) {
>>>> +                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
>>>> +                        __func__, tokens[1], g_strerror(errno));
>>>> +                return -1;
>>>> +            }
>>>> +
>>>> +        } else if (g_strcmp0(tokens[0], "precise") == 0) {
>>>> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
>>>> +                                        &precise_execution)) {
>>>> +                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
>>>> +                return -1;
>>>> +            }
>>>> +        } else {
>>>> +            fprintf(stderr, "option parsing failed: %s\n", opt);
>>>> +            return -1;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
>>>> +    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
>>>> +
>>>> +    time_handle = qemu_plugin_request_time_control();
>>>> +    g_assert(time_handle);
>>>> +
>>>> +    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>>>> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>>>> +    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
>>>> +    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
>>>> +    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
>>>> +    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
>>>> index 0b64d2c1e3a..449ead11305 100644
>>>> --- a/contrib/plugins/Makefile
>>>> +++ b/contrib/plugins/Makefile
>>>> @@ -27,6 +27,7 @@ endif
>>>>    NAMES += hwprofile
>>>>    NAMES += cache
>>>>    NAMES += drcov
>>>> +NAMES += ips
>>>>      ifeq ($(CONFIG_WIN32),y)
>>>>    SO_SUFFIX := .dll
>>>
>
Alex Bennée May 29, 2024, 12:13 p.m. UTC | #5
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

(Added Philip to CC)

> On 5/28/24 12:57, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>> 
>>> On 5/28/24 12:14, Alex Bennée wrote:
>>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>>
>>>>> This plugin uses the new time control interface to make decisions
>>>>> about the state of time during the emulation. The algorithm is
>>>>> currently very simple. The user specifies an ips rate which applies
>>>>> per core.

<snip>

>>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>> +{
>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>> +    /* ensure start time is set first */
>>>>> +    set_start_time();
>>>>> +    /* start counter from absolute time reference */
>>>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>>>> +    vcpu_set_state(vcpu, EXECUTING);
>>>>> +}
>>>>> +
>>>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>> +{
>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>> +    vcpu_set_state(vcpu, IDLE);
>>>>> +}
>>>>> +
>>>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>> +{
>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>> +    g_assert(vcpu->state == IDLE);
>>>> I'm triggering a weird race here:
>>>>     (gdb) b vcpu_init
>>>>     Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>>>     (gdb) r
>>>>     The program being debugged has been started already.
>>>>     Start it from the beginning? (y or n) y
>>>>     Starting program:
>>>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>>>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>>>> -cpu cortex-a57 -smp 32 -accel tcg -device
>>>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>>>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>>>> -blockdev
>>>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>>>> -serial mon:stdio -blockdev
>>>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>>>> -blockdev
>>>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>>>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>>>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>>>> root=/dev/sda2\ console=ttyAMA0 -plugin
>>>> contrib/plugins/libips.so,ips=1000000000
>>>>     [Thread debugging using libthread_db enabled]
>>>>     Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>>>     [New Thread 0x7fffe72006c0 (LWP 360538)]
>>>>     [New Thread 0x7fffe68006c0 (LWP 360540)]
>>>>     [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>>>     [New Thread 0x7fffe54006c0 (LWP 360542)]
>>>>     [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>>>     [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>>>     [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>     [New Thread 0x7fffdea006c0 (LWP 360546)]
>>>>     [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>     Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=0) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb) c
>>>>     Continuing.
>>>>     [New Thread 0x7fffde0006c0 (LWP 360548)]
>>>>     [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>>>     Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=1) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb)
>>>>     Continuing.
>>>>     [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>     [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>>>     Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=2) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb)
>>>>     Continuing.
>>>>     [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>     [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>     Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=3) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb)
>>>>     Continuing.
>>>>     [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>     [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>     Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=4) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb) n
>>>>     129         set_start_time();
>>>>     (gdb)
>>>>     131         vcpu->counter = num_insn_during(uptime_ns());
>>>>     (gdb)
>>>>     132         vcpu_set_state(vcpu, EXECUTING);
>>>>     (gdb)
>>>>     133     }
>>>>     (gdb) p vcpu->state
>>>>     $1 = EXECUTING
>>>>     (gdb) p &vcpu->state
>>>>     $2 = (vCPUState *) 0x555557c6b5d0
>>>>     (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>>>     Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>>>     (gdb) c
>>>>     Continuing.
>>>>     [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>>>     [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>>>     Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>>>> *(vCPUState *) 0x555557c6b5d0
>>>>     Old value = EXECUTING
>>>>     New value = IDLE
>>>>     vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>>>     83      }
>>>>     (gdb) c
>>>>     Continuing.
>>>>     [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>     [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>>>     [New Thread 0x7fffd34006c0 (LWP 360593)]
>>>>     [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>     Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>> (id=10457908569352202058, cpu_index=6) at
>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>     127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>     (gdb)
>>>>     Continuing.
>>>>     [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>>>     **
>>>>     ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>     Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>     Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>>>     [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>     __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>     44      ./nptl/pthread_kill.c: No such file or directory.
>>>>     (gdb) bt
>>>>     #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>     #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>>>     #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>>>>     #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>>>     #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>     #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>     #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>>>     #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>>>     #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>>>>     #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>>>>     #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>>>     #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>>>>     #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>>>>     #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>>>     (gdb)
>>>> But I don't understand how we can ever hit the idle callback without
>>>> first hitting the init callback.
>>>>
>>>
>>> More exactly, the assert we hit means that the idle callback, was not
>>> called before the resume callback.
>>> Any chance you can check what is the current vcpu->state value? I
>>> wonder if it's not an exited cpu, that gets resumed after.
>> No when I looked at it it was set as UNKNOWN. Unfortunately it
>> doesn't
>> trigger if I stick breakpoints in or run under rr.
>>
>
> By using rr record --chaos mode maybe?

Ahh yes that triggered it.

It looks like we have a race:

  (rr) c
  Continuing.

  Thread 1 hit Hardware watchpoint 7: *(int *) 0x5559ba6899b0

  Old value = -1
  New value = 10
  machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2214
  2214            numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
  (rr) c
  Continuing.
  [New Thread 650125.650142]
  [Switching to Thread 650125.650142]

  Thread 44 hit Breakpoint 8, mttcg_cpu_thread_fn (arg=0x5559ba6896e0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:70
  70          assert(tcg_enabled());
  (rr) p cpu->cpu_index
  $24 = 10
  (rr) c
  Continuing.
  [Switching to Thread 650125.650125]

  Thread 1 hit Breakpoint 10, cpu_common_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../hw/core/cpu-common.c:205
  205         Object *machine = qdev_get_machine();
  (rr) p cpu->cpu_index
  $25 = 10
  (rr) bt
  #0  cpu_common_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../hw/core/cpu-common.c:205
  #1  0x00005559b606e6a5 in arm_cpu_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../target/arm/cpu.c:2552
  #2  0x00005559b63dd5d0 in device_set_realized (obj=0x5559ba6896e0, value=true, errp=0x7fff02322d20) at ../../hw/core/qdev.c:510
  #3  0x00005559b63e84e7 in property_set_bool (obj=0x5559ba6896e0, v=0x5559ba681540, name=0x5559b68fafe1 "realized", opaque=0x5559b97f73c0, errp=0x7fff02322d20)
      at ../../qom/object.c:2354
  #4  0x00005559b63e6065 in object_property_set (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", v=0x5559ba681540, errp=0x7fff02322d20) at ../../qom/object.c:1463
  #5  0x00005559b63ead1d in object_property_set_qobject (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=0x5559ba681420, errp=0x5559b7810320 <error_fatal>)
      at ../../qom/qom-qobject.c:28
  #6  0x00005559b63e640a in object_property_set_bool (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=true, errp=0x5559b7810320 <error_fatal>)
      at ../../qom/object.c:1533
  #7  0x00005559b63dccbd in qdev_realize (dev=0x5559ba6896e0, bus=0x0, errp=0x5559b7810320 <error_fatal>) at ../../hw/core/qdev.c:291
  #8  0x00005559b5fe4243 in machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2295
  #9  0x00005559b5ada5d6 in machine_run_board_init (machine=0x5559b9ac3c00, mem_path=0x0, errp=0x7fff02322f30) at ../../hw/core/machine.c:1576
  #10 0x00005559b5eb416f in qemu_init_board () at ../../system/vl.c:2621
  #11 0x00005559b5eb4482 in qmp_x_exit_preconfig (errp=0x5559b7810320 <error_fatal>) at ../../system/vl.c:2713
  #12 0x00005559b5eb7088 in qemu_init (argc=35, argv=0x7fff02323268) at ../../system/vl.c:3759
  #13 0x00005559b63d7ff2 in main (argc=35, argv=0x7fff02323268) at ../../system/main.c:47

Because looking at the order things happen in the specific CPU realisation:

    qemu_init_vcpu(cs);
    cpu_reset(cs);

    acc->parent_realize(dev, errp);

And the common realize function doesn't get to do:

    /* Plugin initialization must wait until the cpu start executing code */
#ifdef CONFIG_PLUGIN
    if (tcg_enabled()) {
        cpu->plugin_state = qemu_plugin_create_vcpu_state();
        async_run_on_cpu(cpu, qemu_plugin_vcpu_init__async, RUN_ON_CPU_NULL);
    }
#endif

before we've started executing.....
Pierrick Bouvier May 29, 2024, 2:33 p.m. UTC | #6
On 5/29/24 05:13, Alex Bennée wrote:
> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
> 
> (Added Philip to CC)
> 
>> On 5/28/24 12:57, Alex Bennée wrote:
>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>
>>>> On 5/28/24 12:14, Alex Bennée wrote:
>>>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>>>
>>>>>> This plugin uses the new time control interface to make decisions
>>>>>> about the state of time during the emulation. The algorithm is
>>>>>> currently very simple. The user specifies an ips rate which applies
>>>>>> per core.
> 
> <snip>
> 
>>>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    /* ensure start time is set first */
>>>>>> +    set_start_time();
>>>>>> +    /* start counter from absolute time reference */
>>>>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>>>>> +    vcpu_set_state(vcpu, EXECUTING);
>>>>>> +}
>>>>>> +
>>>>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    vcpu_set_state(vcpu, IDLE);
>>>>>> +}
>>>>>> +
>>>>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>> +{
>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>> +    g_assert(vcpu->state == IDLE);
>>>>> I'm triggering a weird race here:
>>>>>      (gdb) b vcpu_init
>>>>>      Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>>>>      (gdb) r
>>>>>      The program being debugged has been started already.
>>>>>      Start it from the beginning? (y or n) y
>>>>>      Starting program:
>>>>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>>>>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>>>>> -cpu cortex-a57 -smp 32 -accel tcg -device
>>>>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>>>>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>>>>> -blockdev
>>>>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>>>>> -serial mon:stdio -blockdev
>>>>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>>>>> -blockdev
>>>>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>>>>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>>>>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>>>>> root=/dev/sda2\ console=ttyAMA0 -plugin
>>>>> contrib/plugins/libips.so,ips=1000000000
>>>>>      [Thread debugging using libthread_db enabled]
>>>>>      Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>>>>      [New Thread 0x7fffe72006c0 (LWP 360538)]
>>>>>      [New Thread 0x7fffe68006c0 (LWP 360540)]
>>>>>      [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>>>>      [New Thread 0x7fffe54006c0 (LWP 360542)]
>>>>>      [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>>>>      [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>>>>      [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>>      [New Thread 0x7fffdea006c0 (LWP 360546)]
>>>>>      [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>>      Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=0) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb) c
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffde0006c0 (LWP 360548)]
>>>>>      [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>>>>      Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=1) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb)
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>>      [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>>>>      Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=2) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb)
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>>      [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>>      Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=3) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb)
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>>      [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>>      Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=4) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb) n
>>>>>      129         set_start_time();
>>>>>      (gdb)
>>>>>      131         vcpu->counter = num_insn_during(uptime_ns());
>>>>>      (gdb)
>>>>>      132         vcpu_set_state(vcpu, EXECUTING);
>>>>>      (gdb)
>>>>>      133     }
>>>>>      (gdb) p vcpu->state
>>>>>      $1 = EXECUTING
>>>>>      (gdb) p &vcpu->state
>>>>>      $2 = (vCPUState *) 0x555557c6b5d0
>>>>>      (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>>>>      Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>>>>      (gdb) c
>>>>>      Continuing.
>>>>>      [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>>>>      [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>>>>      Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>>>>> *(vCPUState *) 0x555557c6b5d0
>>>>>      Old value = EXECUTING
>>>>>      New value = IDLE
>>>>>      vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>>>>      83      }
>>>>>      (gdb) c
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>>      [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>>>>      [New Thread 0x7fffd34006c0 (LWP 360593)]
>>>>>      [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>>      Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>> (id=10457908569352202058, cpu_index=6) at
>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>      (gdb)
>>>>>      Continuing.
>>>>>      [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>>>>      **
>>>>>      ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>>      Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>>      Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>>>>      [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>>      __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>>      44      ./nptl/pthread_kill.c: No such file or directory.
>>>>>      (gdb) bt
>>>>>      #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>>      #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>>>>      #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>>>>>      #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>>>>      #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>>      #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>>      #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>>>>      #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>>>>      #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>>>>>      #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>>>>>      #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>>>>      #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>>>>>      #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>>>>>      #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>>>>      (gdb)
>>>>> But I don't understand how we can ever hit the idle callback without
>>>>> first hitting the init callback.
>>>>>
>>>>
>>>> More exactly, the assert we hit means that the idle callback, was not
>>>> called before the resume callback.
>>>> Any chance you can check what is the current vcpu->state value? I
>>>> wonder if it's not an exited cpu, that gets resumed after.
>>> No when I looked at it it was set as UNKNOWN. Unfortunately it
>>> doesn't
>>> trigger if I stick breakpoints in or run under rr.
>>>
>>
>> By using rr record --chaos mode maybe?
> 
> Ahh yes that triggered it.
> 
> It looks like we have a race:
> 
>    (rr) c
>    Continuing.
> 
>    Thread 1 hit Hardware watchpoint 7: *(int *) 0x5559ba6899b0
> 
>    Old value = -1
>    New value = 10
>    machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2214
>    2214            numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
>    (rr) c
>    Continuing.
>    [New Thread 650125.650142]
>    [Switching to Thread 650125.650142]
> 
>    Thread 44 hit Breakpoint 8, mttcg_cpu_thread_fn (arg=0x5559ba6896e0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:70
>    70          assert(tcg_enabled());
>    (rr) p cpu->cpu_index
>    $24 = 10
>    (rr) c
>    Continuing.
>    [Switching to Thread 650125.650125]
> 
>    Thread 1 hit Breakpoint 10, cpu_common_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../hw/core/cpu-common.c:205
>    205         Object *machine = qdev_get_machine();
>    (rr) p cpu->cpu_index
>    $25 = 10
>    (rr) bt
>    #0  cpu_common_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../hw/core/cpu-common.c:205
>    #1  0x00005559b606e6a5 in arm_cpu_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../target/arm/cpu.c:2552
>    #2  0x00005559b63dd5d0 in device_set_realized (obj=0x5559ba6896e0, value=true, errp=0x7fff02322d20) at ../../hw/core/qdev.c:510
>    #3  0x00005559b63e84e7 in property_set_bool (obj=0x5559ba6896e0, v=0x5559ba681540, name=0x5559b68fafe1 "realized", opaque=0x5559b97f73c0, errp=0x7fff02322d20)
>        at ../../qom/object.c:2354
>    #4  0x00005559b63e6065 in object_property_set (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", v=0x5559ba681540, errp=0x7fff02322d20) at ../../qom/object.c:1463
>    #5  0x00005559b63ead1d in object_property_set_qobject (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=0x5559ba681420, errp=0x5559b7810320 <error_fatal>)
>        at ../../qom/qom-qobject.c:28
>    #6  0x00005559b63e640a in object_property_set_bool (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=true, errp=0x5559b7810320 <error_fatal>)
>        at ../../qom/object.c:1533
>    #7  0x00005559b63dccbd in qdev_realize (dev=0x5559ba6896e0, bus=0x0, errp=0x5559b7810320 <error_fatal>) at ../../hw/core/qdev.c:291
>    #8  0x00005559b5fe4243 in machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2295
>    #9  0x00005559b5ada5d6 in machine_run_board_init (machine=0x5559b9ac3c00, mem_path=0x0, errp=0x7fff02322f30) at ../../hw/core/machine.c:1576
>    #10 0x00005559b5eb416f in qemu_init_board () at ../../system/vl.c:2621
>    #11 0x00005559b5eb4482 in qmp_x_exit_preconfig (errp=0x5559b7810320 <error_fatal>) at ../../system/vl.c:2713
>    #12 0x00005559b5eb7088 in qemu_init (argc=35, argv=0x7fff02323268) at ../../system/vl.c:3759
>    #13 0x00005559b63d7ff2 in main (argc=35, argv=0x7fff02323268) at ../../system/main.c:47
> 
> Because looking at the order things happen in the specific CPU realisation:
> 
>      qemu_init_vcpu(cs);
>      cpu_reset(cs);
> 
>      acc->parent_realize(dev, errp);
> 
> And the common realize function doesn't get to do:
> 
>      /* Plugin initialization must wait until the cpu start executing code */
> #ifdef CONFIG_PLUGIN
>      if (tcg_enabled()) {
>          cpu->plugin_state = qemu_plugin_create_vcpu_state();
>          async_run_on_cpu(cpu, qemu_plugin_vcpu_init__async, RUN_ON_CPU_NULL);
>      }
> #endif
> 
> before we've started executing.....
> 

Thanks for the analysis Alex.
It seems to me this is integrating the recent series from Philippe.
Maybe the race has been introduced there.
Could you try without this series?
Alex Bennée May 29, 2024, 3:39 p.m. UTC | #7
Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:

> On 5/29/24 05:13, Alex Bennée wrote:
>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>> (Added Philip to CC)
>> 
>>> On 5/28/24 12:57, Alex Bennée wrote:
>>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>>
>>>>> On 5/28/24 12:14, Alex Bennée wrote:
>>>>>> Pierrick Bouvier <pierrick.bouvier@linaro.org> writes:
>>>>>>
>>>>>>> This plugin uses the new time control interface to make decisions
>>>>>>> about the state of time during the emulation. The algorithm is
>>>>>>> currently very simple. The user specifies an ips rate which applies
>>>>>>> per core.
>> <snip>
>> 
>>>>>>> +static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>>> +{
>>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>> +    /* ensure start time is set first */
>>>>>>> +    set_start_time();
>>>>>>> +    /* start counter from absolute time reference */
>>>>>>> +    vcpu->counter = num_insn_during(uptime_ns());
>>>>>>> +    vcpu_set_state(vcpu, EXECUTING);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>>> +{
>>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>> +    vcpu_set_state(vcpu, IDLE);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
>>>>>>> +{
>>>>>>> +    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>> +    g_assert(vcpu->state == IDLE);
>>>>>> I'm triggering a weird race here:
>>>>>>      (gdb) b vcpu_init
>>>>>>      Breakpoint 1 at 0x7ffff7fa15f7: file /home/alex/lsrc/qemu.git/contrib/plugins/ips.c, line 127.
>>>>>>      (gdb) r
>>>>>>      The program being debugged has been started already.
>>>>>>      Start it from the beginning? (y or n) y
>>>>>>      Starting program:
>>>>>> /home/alex/lsrc/qemu.git/builds/arm.debug/qemu-system-aarch64
>>>>>> -machine type=virt,virtualization=on,pflash0=rom,pflash1=efivars
>>>>>> -cpu cortex-a57 -smp 32 -accel tcg -device
>>>>>> virtio-net-pci,netdev=unet -device virtio-scsi-pci -device
>>>>>> scsi-hd,drive=hd -netdev user,id=unet,hostfwd=tcp::2222-:22
>>>>>> -blockdev
>>>>>> driver=raw,node-name=hd,file.driver=host_device,file.filename=/dev/zen-ssd2/trixie-arm64,discard=unmap
>>>>>> -serial mon:stdio -blockdev
>>>>>> node-name=rom,driver=file,filename=/home/alex/lsrc/qemu.git/builds/arm.debug/pc-bios/edk2-aarch64-code.fd,read-only=true
>>>>>> -blockdev
>>>>>> node-name=efivars,driver=file,filename=/home/alex/images/qemu-arm64-efivars
>>>>>> -m 8192 -object memory-backend-memfd,id=mem,size=8G,share=on -kernel
>>>>>> /home/alex/lsrc/linux.git/builds/arm64/arch/arm64/boot/Image -append
>>>>>> root=/dev/sda2\ console=ttyAMA0 -plugin
>>>>>> contrib/plugins/libips.so,ips=1000000000
>>>>>>      [Thread debugging using libthread_db enabled]
>>>>>>      Using host libthread_db library "/lib/x86_64-linux-gnu/libthread_db.so.1".
>>>>>>      [New Thread 0x7fffe72006c0 (LWP 360538)]
>>>>>>      [New Thread 0x7fffe68006c0 (LWP 360540)]
>>>>>>      [New Thread 0x7fffe5e006c0 (LWP 360541)]
>>>>>>      [New Thread 0x7fffe54006c0 (LWP 360542)]
>>>>>>      [New Thread 0x7fffe4a006c0 (LWP 360543)]
>>>>>>      [New Thread 0x7fffdfe006c0 (LWP 360544)]
>>>>>>      [New Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>>>      [New Thread 0x7fffdea006c0 (LWP 360546)]
>>>>>>      [Switching to Thread 0x7fffdf4006c0 (LWP 360545)]
>>>>>>      Thread 8 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=0) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb) c
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffde0006c0 (LWP 360548)]
>>>>>>      [Switching to Thread 0x7fffdea006c0 (LWP 360546)]
>>>>>>      Thread 9 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=1) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb)
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>>>      [Switching to Thread 0x7fffde0006c0 (LWP 360548)]
>>>>>>      Thread 10 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=2) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb)
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>>>      [Switching to Thread 0x7fffdd6006c0 (LWP 360549)]
>>>>>>      Thread 11 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=3) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb)
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>>>      [Switching to Thread 0x7fffdcc006c0 (LWP 360550)]
>>>>>>      Thread 12 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=4) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb) n
>>>>>>      129         set_start_time();
>>>>>>      (gdb)
>>>>>>      131         vcpu->counter = num_insn_during(uptime_ns());
>>>>>>      (gdb)
>>>>>>      132         vcpu_set_state(vcpu, EXECUTING);
>>>>>>      (gdb)
>>>>>>      133     }
>>>>>>      (gdb) p vcpu->state
>>>>>>      $1 = EXECUTING
>>>>>>      (gdb) p &vcpu->state
>>>>>>      $2 = (vCPUState *) 0x555557c6b5d0
>>>>>>      (gdb) watch *(vCPUState *) 0x555557c6b5d0
>>>>>>      Hardware watchpoint 2: *(vCPUState *) 0x555557c6b5d0
>>>>>>      (gdb) c
>>>>>>      Continuing.
>>>>>>      [Thread 0x7fffdfe006c0 (LWP 360544) exited]
>>>>>>      [Thread 0x7fffe5e006c0 (LWP 360541) exited]
>>>>>>      Thread 12 "qemu-system-aar" hit Hardware watchpoint 2:
>>>>>> *(vCPUState *) 0x555557c6b5d0
>>>>>>      Old value = EXECUTING
>>>>>>      New value = IDLE
>>>>>>      vcpu_set_state (vcpu=0x555557c6b5c0, new_state=IDLE) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:83
>>>>>>      83      }
>>>>>>      (gdb) c
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>>>      [New Thread 0x7fffe5e006c0 (LWP 360592)]
>>>>>>      [New Thread 0x7fffd34006c0 (LWP 360593)]
>>>>>>      [Switching to Thread 0x7fffdfe006c0 (LWP 360591)]
>>>>>>      Thread 14 "qemu-system-aar" hit Breakpoint 1, vcpu_init
>>>>>> (id=10457908569352202058, cpu_index=6) at
>>>>>> /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:127
>>>>>>      127         vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
>>>>>>      (gdb)
>>>>>>      Continuing.
>>>>>>      [New Thread 0x7fffd2a006c0 (LWP 360594)]
>>>>>>      **
>>>>>>      ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>>>      Bail out! ERROR:/home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144:vcpu_resume: assertion failed: (vcpu->state == IDLE)
>>>>>>      Thread 13 "qemu-system-aar" received signal SIGABRT, Aborted.
>>>>>>      [Switching to Thread 0x7fffd3e006c0 (LWP 360551)]
>>>>>>      __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>>>      44      ./nptl/pthread_kill.c: No such file or directory.
>>>>>>      (gdb) bt
>>>>>>      #0  __pthread_kill_implementation (threadid=<optimized out>, signo=signo@entry=6, no_tid=no_tid@entry=0) at ./nptl/pthread_kill.c:44
>>>>>>      #1  0x00007ffff4ca9e8f in __pthread_kill_internal (signo=6, threadid=<optimized out>) at ./nptl/pthread_kill.c:78
>>>>>>      #2  0x00007ffff4c5afb2 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
>>>>>>      #3  0x00007ffff4c45472 in __GI_abort () at ./stdlib/abort.c:79
>>>>>>      #4  0x00007ffff6e46ec8 in  () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>>>      #5  0x00007ffff6ea6e1a in g_assertion_message_expr () at /lib/x86_64-linux-gnu/libglib-2.0.so.0
>>>>>>      #6  0x00007ffff7fa16e2 in vcpu_resume (id=10457908569352202058, cpu_index=5) at /home/alex/lsrc/qemu.git/contrib/plugins/ips.c:144
>>>>>>      #7  0x00005555562c3202 in plugin_vcpu_cb__simple (cpu=0x5555583d9540, ev=QEMU_PLUGIN_EV_VCPU_RESUME) at ../../plugins/core.c:111
>>>>>>      #8  0x00005555562c43f7 in qemu_plugin_vcpu_resume_cb (cpu=0x5555583d9540) at ../../plugins/core.c:523
>>>>>>      #9  0x0000555555d9150e in qemu_wait_io_event (cpu=0x5555583d9540) at ../../system/cpus.c:465
>>>>>>      #10 0x00005555562c67d0 in mttcg_cpu_thread_fn (arg=0x5555583d9540) at ../../accel/tcg/tcg-accel-ops-mttcg.c:118
>>>>>>      #11 0x00005555565120ff in qemu_thread_start (args=0x5555584716e0) at ../../util/qemu-thread-posix.c:541
>>>>>>      #12 0x00007ffff4ca8134 in start_thread (arg=<optimized out>) at ./nptl/pthread_create.c:442
>>>>>>      #13 0x00007ffff4d287dc in clone3 () at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:81
>>>>>>      (gdb)
>>>>>> But I don't understand how we can ever hit the idle callback without
>>>>>> first hitting the init callback.
>>>>>>
>>>>>
>>>>> More exactly, the assert we hit means that the idle callback, was not
>>>>> called before the resume callback.
>>>>> Any chance you can check what is the current vcpu->state value? I
>>>>> wonder if it's not an exited cpu, that gets resumed after.
>>>> No when I looked at it it was set as UNKNOWN. Unfortunately it
>>>> doesn't
>>>> trigger if I stick breakpoints in or run under rr.
>>>>
>>>
>>> By using rr record --chaos mode maybe?
>> Ahh yes that triggered it.
>> It looks like we have a race:
>>    (rr) c
>>    Continuing.
>>    Thread 1 hit Hardware watchpoint 7: *(int *) 0x5559ba6899b0
>>    Old value = -1
>>    New value = 10
>>    machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2214
>>    2214            numa_cpu_pre_plug(&possible_cpus->cpus[cs->cpu_index], DEVICE(cpuobj),
>>    (rr) c
>>    Continuing.
>>    [New Thread 650125.650142]
>>    [Switching to Thread 650125.650142]
>>    Thread 44 hit Breakpoint 8, mttcg_cpu_thread_fn
>> (arg=0x5559ba6896e0) at ../../accel/tcg/tcg-accel-ops-mttcg.c:70
>>    70          assert(tcg_enabled());
>>    (rr) p cpu->cpu_index
>>    $24 = 10
>>    (rr) c
>>    Continuing.
>>    [Switching to Thread 650125.650125]
>>    Thread 1 hit Breakpoint 10, cpu_common_realizefn
>> (dev=0x5559ba6896e0, errp=0x7fff02322c10) at
>> ../../hw/core/cpu-common.c:205
>>    205         Object *machine = qdev_get_machine();
>>    (rr) p cpu->cpu_index
>>    $25 = 10
>>    (rr) bt
>>    #0  cpu_common_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../hw/core/cpu-common.c:205
>>    #1  0x00005559b606e6a5 in arm_cpu_realizefn (dev=0x5559ba6896e0, errp=0x7fff02322c10) at ../../target/arm/cpu.c:2552
>>    #2  0x00005559b63dd5d0 in device_set_realized (obj=0x5559ba6896e0, value=true, errp=0x7fff02322d20) at ../../hw/core/qdev.c:510
>>    #3  0x00005559b63e84e7 in property_set_bool (obj=0x5559ba6896e0, v=0x5559ba681540, name=0x5559b68fafe1 "realized", opaque=0x5559b97f73c0, errp=0x7fff02322d20)
>>        at ../../qom/object.c:2354
>>    #4  0x00005559b63e6065 in object_property_set (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", v=0x5559ba681540, errp=0x7fff02322d20) at ../../qom/object.c:1463
>>    #5  0x00005559b63ead1d in object_property_set_qobject (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=0x5559ba681420, errp=0x5559b7810320 <error_fatal>)
>>        at ../../qom/qom-qobject.c:28
>>    #6  0x00005559b63e640a in object_property_set_bool (obj=0x5559ba6896e0, name=0x5559b68fafe1 "realized", value=true, errp=0x5559b7810320 <error_fatal>)
>>        at ../../qom/object.c:1533
>>    #7  0x00005559b63dccbd in qdev_realize (dev=0x5559ba6896e0, bus=0x0, errp=0x5559b7810320 <error_fatal>) at ../../hw/core/qdev.c:291
>>    #8  0x00005559b5fe4243 in machvirt_init (machine=0x5559b9ac3c00) at ../../hw/arm/virt.c:2295
>>    #9  0x00005559b5ada5d6 in machine_run_board_init (machine=0x5559b9ac3c00, mem_path=0x0, errp=0x7fff02322f30) at ../../hw/core/machine.c:1576
>>    #10 0x00005559b5eb416f in qemu_init_board () at ../../system/vl.c:2621
>>    #11 0x00005559b5eb4482 in qmp_x_exit_preconfig (errp=0x5559b7810320 <error_fatal>) at ../../system/vl.c:2713
>>    #12 0x00005559b5eb7088 in qemu_init (argc=35, argv=0x7fff02323268) at ../../system/vl.c:3759
>>    #13 0x00005559b63d7ff2 in main (argc=35, argv=0x7fff02323268) at ../../system/main.c:47
>> Because looking at the order things happen in the specific CPU
>> realisation:
>>      qemu_init_vcpu(cs);
>>      cpu_reset(cs);
>>      acc->parent_realize(dev, errp);
>> And the common realize function doesn't get to do:
>>      /* Plugin initialization must wait until the cpu start
>> executing code */
>> #ifdef CONFIG_PLUGIN
>>      if (tcg_enabled()) {
>>          cpu->plugin_state = qemu_plugin_create_vcpu_state();
>>          async_run_on_cpu(cpu, qemu_plugin_vcpu_init__async, RUN_ON_CPU_NULL);
>>      }
>> #endif
>> before we've started executing.....
>> 
>
> Thanks for the analysis Alex.
> It seems to me this is integrating the recent series from Philippe.
> Maybe the race has been introduced there.
> Could you try without this series?

No this is based off master and I think the race has always been there.
I think I was just triggering because I was using -smp 32 for some bit
system simulations. See:

  Message-Id: <20240529152219.825680-1-alex.bennee@linaro.org>
  Date: Wed, 29 May 2024 16:22:19 +0100
  Subject: [RFC PATCH] cpus: split qemu_init_vcpu and delay vCPU thread creation
  From: =?UTF-8?q?Alex=20Benn=C3=A9e?= <alex.bennee@linaro.org>
diff mbox series

Patch

diff --git a/contrib/plugins/ips.c b/contrib/plugins/ips.c
new file mode 100644
index 00000000000..cf3159df391
--- /dev/null
+++ b/contrib/plugins/ips.c
@@ -0,0 +1,239 @@ 
+/*
+ * ips rate limiting plugin.
+ *
+ * This plugin can be used to restrict the execution of a system to a
+ * particular number of Instructions Per Second (ips). This controls
+ * time as seen by the guest so while wall-clock time may be longer
+ * from the guests point of view time will pass at the normal rate.
+ *
+ * This uses the new plugin API which allows the plugin to control
+ * system time.
+ *
+ * Copyright (c) 2023 Linaro Ltd
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <stdio.h>
+#include <glib.h>
+#include <qemu-plugin.h>
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
+
+/* how many times do we update time per sec */
+#define NUM_TIME_UPDATE_PER_SEC 10
+#define NSEC_IN_ONE_SEC (1000 * 1000 * 1000)
+
+static GMutex global_state_lock;
+
+static uint64_t insn_per_second = 1000 * 1000; /* ips per core, per second */
+static uint64_t insn_quantum; /* trap every N instructions */
+static bool precise_execution; /* count every instruction */
+static int64_t start_time_ns; /* time (ns since epoch) first vCPU started */
+static int64_t virtual_time_ns; /* last set virtual time */
+
+static const void *time_handle;
+
+typedef enum {
+    UNKNOWN = 0,
+    EXECUTING,
+    IDLE,
+    FINISHED
+} vCPUState;
+
+typedef struct {
+    uint64_t counter;
+    uint64_t track_insn;
+    vCPUState state;
+    /* timestamp when vCPU entered state */
+    int64_t last_state_time;
+} vCPUTime;
+
+struct qemu_plugin_scoreboard *vcpus;
+
+/* return epoch time in ns */
+static int64_t now_ns(void)
+{
+    return g_get_real_time() * 1000;
+}
+
+static uint64_t num_insn_during(int64_t elapsed_ns)
+{
+    double num_secs = elapsed_ns / (double) NSEC_IN_ONE_SEC;
+    return num_secs * (double) insn_per_second;
+}
+
+static int64_t time_for_insn(uint64_t num_insn)
+{
+    double num_secs = (double) num_insn / (double) insn_per_second;
+    return num_secs * (double) NSEC_IN_ONE_SEC;
+}
+
+static int64_t uptime_ns(void)
+{
+    int64_t now = now_ns();
+    g_assert(now >= start_time_ns);
+    return now - start_time_ns;
+}
+
+static void vcpu_set_state(vCPUTime *vcpu, vCPUState new_state)
+{
+    vcpu->last_state_time = now_ns();
+    vcpu->state = new_state;
+}
+
+static void update_system_time(vCPUTime *vcpu)
+{
+    /* flush remaining instructions */
+    vcpu->counter += vcpu->track_insn;
+    vcpu->track_insn = 0;
+
+    int64_t uptime = uptime_ns();
+    uint64_t expected_insn = num_insn_during(uptime);
+
+    if (vcpu->counter >= expected_insn) {
+        /* this vcpu ran faster than expected, so it has to sleep */
+        uint64_t insn_advance = vcpu->counter - expected_insn;
+        uint64_t time_advance_ns = time_for_insn(insn_advance);
+        int64_t sleep_us = time_advance_ns / 1000;
+        g_usleep(sleep_us);
+    }
+
+    /* based on number of instructions, what should be the new time? */
+    int64_t new_virtual_time = time_for_insn(vcpu->counter);
+
+    g_mutex_lock(&global_state_lock);
+
+    /* Time only moves forward. Another vcpu might have updated it already. */
+    if (new_virtual_time > virtual_time_ns) {
+        qemu_plugin_update_ns(time_handle, new_virtual_time);
+        virtual_time_ns = new_virtual_time;
+    }
+
+    g_mutex_unlock(&global_state_lock);
+}
+
+static void set_start_time()
+{
+    g_mutex_lock(&global_state_lock);
+    if (!start_time_ns) {
+        start_time_ns = now_ns();
+    }
+    g_mutex_unlock(&global_state_lock);
+}
+
+static void vcpu_init(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    /* ensure start time is set first */
+    set_start_time();
+    /* start counter from absolute time reference */
+    vcpu->counter = num_insn_during(uptime_ns());
+    vcpu_set_state(vcpu, EXECUTING);
+}
+
+static void vcpu_idle(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    vcpu_set_state(vcpu, IDLE);
+}
+
+static void vcpu_resume(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    g_assert(vcpu->state == IDLE);
+    int64_t idle_time = now_ns() - vcpu->last_state_time;
+    /* accumulate expected number of instructions */
+    vcpu->counter += num_insn_during(idle_time);
+    vcpu_set_state(vcpu, EXECUTING);
+}
+
+static void vcpu_exit(qemu_plugin_id_t id, unsigned int cpu_index)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    vcpu_set_state(vcpu, FINISHED);
+    update_system_time(vcpu);
+    vcpu->counter = 0;
+}
+
+static void every_insn_quantum(unsigned int cpu_index, void *udata)
+{
+    vCPUTime *vcpu = qemu_plugin_scoreboard_find(vcpus, cpu_index);
+    g_assert(vcpu->track_insn >= insn_quantum);
+    update_system_time(vcpu);
+}
+
+static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
+{
+    size_t n_insns = qemu_plugin_tb_n_insns(tb);
+    qemu_plugin_u64 track_insn =
+        qemu_plugin_scoreboard_u64_in_struct(vcpus, vCPUTime, track_insn);
+    if (precise_execution) {
+        /* count (and eventually trap) on every instruction */
+        for (int idx = 0; idx < qemu_plugin_tb_n_insns(tb); ++idx) {
+            struct qemu_plugin_insn *insn = qemu_plugin_tb_get_insn(tb, idx);
+            qemu_plugin_register_vcpu_insn_exec_inline_per_vcpu(
+                insn, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, 1);
+            qemu_plugin_register_vcpu_insn_exec_cond_cb(
+                insn, every_insn_quantum,
+                QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
+                track_insn, insn_quantum, NULL);
+        }
+    } else {
+        /* count (and eventually trap) once per tb */
+        qemu_plugin_register_vcpu_tb_exec_inline_per_vcpu(
+            tb, QEMU_PLUGIN_INLINE_ADD_U64, track_insn, n_insns);
+        qemu_plugin_register_vcpu_tb_exec_cond_cb(
+            tb, every_insn_quantum,
+            QEMU_PLUGIN_CB_NO_REGS, QEMU_PLUGIN_COND_GE,
+            track_insn, insn_quantum, NULL);
+    }
+}
+
+static void plugin_exit(qemu_plugin_id_t id, void *udata)
+{
+    qemu_plugin_scoreboard_free(vcpus);
+}
+
+QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
+                                           const qemu_info_t *info, int argc,
+                                           char **argv)
+{
+    for (int i = 0; i < argc; i++) {
+        char *opt = argv[i];
+        g_auto(GStrv) tokens = g_strsplit(opt, "=", 2);
+        if (g_strcmp0(tokens[0], "ips") == 0) {
+            insn_per_second = g_ascii_strtoull(tokens[1], NULL, 10);
+            if (!insn_per_second && errno) {
+                fprintf(stderr, "%s: couldn't parse %s (%s)\n",
+                        __func__, tokens[1], g_strerror(errno));
+                return -1;
+            }
+
+        } else if (g_strcmp0(tokens[0], "precise") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1],
+                                        &precise_execution)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+        } else {
+            fprintf(stderr, "option parsing failed: %s\n", opt);
+            return -1;
+        }
+    }
+
+    vcpus = qemu_plugin_scoreboard_new(sizeof(vCPUTime));
+    insn_quantum = insn_per_second / NUM_TIME_UPDATE_PER_SEC;
+
+    time_handle = qemu_plugin_request_time_control();
+    g_assert(time_handle);
+
+    qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
+    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
+    qemu_plugin_register_vcpu_idle_cb(id, vcpu_idle);
+    qemu_plugin_register_vcpu_resume_cb(id, vcpu_resume);
+    qemu_plugin_register_vcpu_exit_cb(id, vcpu_exit);
+    qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
+
+    return 0;
+}
diff --git a/contrib/plugins/Makefile b/contrib/plugins/Makefile
index 0b64d2c1e3a..449ead11305 100644
--- a/contrib/plugins/Makefile
+++ b/contrib/plugins/Makefile
@@ -27,6 +27,7 @@  endif
 NAMES += hwprofile
 NAMES += cache
 NAMES += drcov
+NAMES += ips
 
 ifeq ($(CONFIG_WIN32),y)
 SO_SUFFIX := .dll