diff mbox series

[v3,19/21] contrib/plugins: extend execlog to track register changes

Message ID 20240122145610.413836-20-alex.bennee@linaro.org
State Superseded
Headers show
Series plugin updates (register access) for 9.0 (pre-PR?) | expand

Commit Message

Alex Bennée Jan. 22, 2024, 2:56 p.m. UTC
With the new plugin register API we can now track changes to register
values. Currently the implementation is fairly dumb which will slow
down if a large number of register values are being tracked. This
could be improved by only instrumenting instructions which mention
registers we are interested in tracking.

Example usage:

  ./qemu-aarch64 -D plugin.log -d plugin \
     -cpu max,sve256=on \
     -plugin contrib/plugins/libexeclog.so,reg=sp,reg=z\* \
     ./tests/tcg/aarch64-linux-user/sha512-sve

will display in the execlog any changes to the stack pointer (sp) and
the SVE Z registers.

Message-Id: <20240103173349.398526-41-alex.bennee@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
Based-On: <20231025093128.33116-19-akihiko.odaki@daynix.com>

---
v3
  - just use a GArray for the CPU array
  - drop duplicate of cpu_index
---
 docs/devel/tcg-plugins.rst |  17 +-
 contrib/plugins/execlog.c  | 317 +++++++++++++++++++++++++++++++------
 2 files changed, 282 insertions(+), 52 deletions(-)

Comments

Pierrick Bouvier Jan. 24, 2024, 5:54 a.m. UTC | #1
On 1/22/24 18:56, Alex Bennée wrote:
> With the new plugin register API we can now track changes to register
> values. Currently the implementation is fairly dumb which will slow
> down if a large number of register values are being tracked. This
> could be improved by only instrumenting instructions which mention
> registers we are interested in tracking.
> 
> Example usage:
> 
>    ./qemu-aarch64 -D plugin.log -d plugin \
>       -cpu max,sve256=on \
>       -plugin contrib/plugins/libexeclog.so,reg=sp,reg=z\* \
>       ./tests/tcg/aarch64-linux-user/sha512-sve
> 
> will display in the execlog any changes to the stack pointer (sp) and
> the SVE Z registers.
> 
> Message-Id: <20240103173349.398526-41-alex.bennee@linaro.org>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> Cc: Akihiko Odaki <akihiko.odaki@daynix.com>
> Based-On: <20231025093128.33116-19-akihiko.odaki@daynix.com>
> 
> ---
> v3
>    - just use a GArray for the CPU array
>    - drop duplicate of cpu_index
> ---
>   docs/devel/tcg-plugins.rst |  17 +-
>   contrib/plugins/execlog.c  | 317 +++++++++++++++++++++++++++++++------
>   2 files changed, 282 insertions(+), 52 deletions(-)
> 
> diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
> index 81dcd43a612..fa7421279f5 100644
> --- a/docs/devel/tcg-plugins.rst
> +++ b/docs/devel/tcg-plugins.rst
> @@ -497,6 +497,22 @@ arguments if required::
>     $ qemu-system-arm $(QEMU_ARGS) \
>       -plugin ./contrib/plugins/libexeclog.so,ifilter=st1w,afilter=0x40001808 -d plugin
>   
> +This plugin can also dump registers when they change value. Specify the name of the
> +registers with multiple ``reg`` options. You can also use glob style matching if you wish::
> +
> +  $ qemu-system-arm $(QEMU_ARGS) \
> +    -plugin ./contrib/plugins/libexeclog.so,reg=\*_el2,reg=sp -d plugin
> +
> +Be aware that each additional register to check will slow down
> +execution quite considerably. You can optimise the number of register
> +checks done by using the rdisas option. This will only instrument
> +instructions that mention the registers in question in disassembly.
> +This is not foolproof as some instructions implicitly change
> +instructions. You can use the ifilter to catch these cases:
> +
> +  $ qemu-system-arm $(QEMU_ARGS) \
> +    -plugin ./contrib/plugins/libexeclog.so,ifilter=msr,ifilter=blr,reg=x30,reg=\*_el1,rdisas=on
> +
>   - contrib/plugins/cache.c
>   
>   Cache modelling plugin that measures the performance of a given L1 cache
> @@ -583,4 +599,3 @@ The following API is generated from the inline documentation in
>   include the full kernel-doc annotations.
>   
>   .. kernel-doc:: include/qemu/qemu-plugin.h
> -
> diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
> index f262e5555eb..c26664c0ab3 100644
> --- a/contrib/plugins/execlog.c
> +++ b/contrib/plugins/execlog.c
> @@ -1,7 +1,7 @@
>   /*
>    * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
>    *
> - * Log instruction execution with memory access.
> + * Log instruction execution with memory access and register changes
>    *
>    * License: GNU GPL, version 2 or later.
>    *   See the COPYING file in the top-level directory.
> @@ -15,29 +15,40 @@
>   
>   #include <qemu-plugin.h>
>   
> +typedef struct {
> +    struct qemu_plugin_register *handle;
> +    GByteArray *last;
> +    GByteArray *new;
> +    const char *name;
> +} Register;
> +
> +typedef struct CPU {
> +    /* Store last executed instruction on each vCPU as a GString */
> +    GString *last_exec;
> +    /* Ptr array of Register */
> +    GPtrArray *registers;
> +} CPU;
> +
>   QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
>   
> -/* Store last executed instruction on each vCPU as a GString */
> -static GPtrArray *last_exec;
> +static GArray *cpus;
>   static GRWLock expand_array_lock;
>   
>   static GPtrArray *imatches;
>   static GArray *amatches;
> +static GPtrArray *rmatches;
> +static bool disas_assist;
> +static GMutex add_reg_name_lock;
> +static GPtrArray *all_reg_names;
>   
> -/*
> - * Expand last_exec array.
> - *
> - * As we could have multiple threads trying to do this we need to
> - * serialise the expansion under a lock.
> - */
> -static void expand_last_exec(int cpu_index)
> +static CPU *get_cpu(int vcpu_index)
>   {
> -    g_rw_lock_writer_lock(&expand_array_lock);
> -    while (cpu_index >= last_exec->len) {
> -        GString *s = g_string_new(NULL);
> -        g_ptr_array_add(last_exec, s);
> -    }
> -    g_rw_lock_writer_unlock(&expand_array_lock);
> +    CPU *c;
> +    g_rw_lock_reader_lock(&expand_array_lock);
> +    c = &g_array_index(cpus, CPU, vcpu_index);
> +    g_rw_lock_reader_unlock(&expand_array_lock);
> +
> +    return c;
>   }
>   
>   /**
> @@ -46,13 +57,10 @@ static void expand_last_exec(int cpu_index)
>   static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
>                        uint64_t vaddr, void *udata)
>   {
> -    GString *s;
> +    CPU *c = get_cpu(cpu_index);
> +    GString *s = c->last_exec;
>   
>       /* Find vCPU in array */
> -    g_rw_lock_reader_lock(&expand_array_lock);
> -    g_assert(cpu_index < last_exec->len);
> -    s = g_ptr_array_index(last_exec, cpu_index);
> -    g_rw_lock_reader_unlock(&expand_array_lock);
>   
>       /* Indicate type of memory access */
>       if (qemu_plugin_mem_is_store(info)) {
> @@ -73,32 +81,91 @@ static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
>   }
>   
>   /**
> - * Log instruction execution
> + * Log instruction execution, outputting the last one.
> + *
> + * vcpu_insn_exec() is a copy and paste of vcpu_insn_exec_with_regs()
> + * without the checking of register values when we've attempted to
> + * optimise with disas_assist.
>    */
> -static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
> +static void insn_check_regs(int vcpu_index, CPU *cpu)
>   {
> -    GString *s;
> +    for (int n = 0; n < cpu->registers->len; n++) {
> +        Register *reg = cpu->registers->pdata[n];
> +        int sz;
>   
> -    /* Find or create vCPU in array */
> -    g_rw_lock_reader_lock(&expand_array_lock);
> -    if (cpu_index >= last_exec->len) {
> -        g_rw_lock_reader_unlock(&expand_array_lock);
> -        expand_last_exec(cpu_index);
> -        g_rw_lock_reader_lock(&expand_array_lock);
> +        g_byte_array_set_size(reg->new, 0);
> +        sz = qemu_plugin_read_register(vcpu_index, reg->handle, reg->new);
> +        g_assert(sz == reg->last->len);
> +
> +        if (memcmp(reg->last->data, reg->new->data, sz)) {
> +            GByteArray *temp = reg->last;
> +            g_string_append_printf(cpu->last_exec, ", %s -> 0x", reg->name);
> +            /* TODO: handle BE properly */
> +            for (int i = sz; i >= 0; i--) {
> +                g_string_append_printf(cpu->last_exec, "%02x",
> +                                       reg->new->data[i]);
> +            }
> +            reg->last = reg->new;
> +            reg->new = temp;
> +        }
> +    }
> +}
> +
> +/* Log last instruction while checking registers */
> +static void vcpu_insn_exec_with_regs(unsigned int cpu_index, void *udata)
> +{
> +    CPU *cpu = get_cpu(cpu_index);
> +
> +    /* Print previous instruction in cache */
> +    if (cpu->last_exec->len) {
> +        if (cpu->registers) {
> +            insn_check_regs(cpu_index, cpu);
> +        }
> +
> +        qemu_plugin_outs(cpu->last_exec->str);
> +        qemu_plugin_outs("\n");
> +    }
> +
> +    /* Store new instruction in cache */
> +    /* vcpu_mem will add memory access information to last_exec */
> +    g_string_printf(cpu->last_exec, "%u, ", cpu_index);
> +    g_string_append(cpu->last_exec, (char *)udata);
> +}
> +
> +/* Log last instruction while checking registers, ignore next */
> +static void vcpu_insn_exec_only_regs(unsigned int cpu_index, void *udata)
> +{
> +    CPU *cpu = get_cpu(cpu_index);
> +
> +    /* Print previous instruction in cache */
> +    if (cpu->last_exec->len) {
> +        if (cpu->registers) {
> +            insn_check_regs(cpu_index, cpu);
> +        }
> +
> +        qemu_plugin_outs(cpu->last_exec->str);
> +        qemu_plugin_outs("\n");
>       }
> -    s = g_ptr_array_index(last_exec, cpu_index);
> -    g_rw_lock_reader_unlock(&expand_array_lock);
> +
> +    /* reset */
> +    cpu->last_exec->len = 0;
> +}
> +
> +/* Log last instruction without checking regs, setup next */
> +static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
> +{
> +    CPU *cpu = get_cpu(cpu_index);
>   
>       /* Print previous instruction in cache */
> -    if (s->len) {
> -        qemu_plugin_outs(s->str);
> +    if (cpu->last_exec->len) {
> +        qemu_plugin_outs(cpu->last_exec->str);
>           qemu_plugin_outs("\n");
>       }
>   
>       /* Store new instruction in cache */
>       /* vcpu_mem will add memory access information to last_exec */
> -    g_string_printf(s, "%u, ", cpu_index);
> -    g_string_append(s, (char *)udata);
> +    g_string_printf(cpu->last_exec, "%u, ", cpu_index);
> +    g_string_append(cpu->last_exec, (char *)udata);
>   }
>   
>   /**
> @@ -111,6 +178,8 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>   {
>       struct qemu_plugin_insn *insn;
>       bool skip = (imatches || amatches);
> +    bool check_regs_this = rmatches;
> +    bool check_regs_next = false;
>   
>       size_t n = qemu_plugin_tb_n_insns(tb);
>       for (size_t i = 0; i < n; i++) {
> @@ -131,7 +200,8 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>           /*
>            * If we are filtering we better check out if we have any
>            * hits. The skip "latches" so we can track memory accesses

nit: matches

> -         * after the instruction we care about.
> +         * after the instruction we care about. Also enable register
> +         * checking on the next instruction.
>            */
>           if (skip && imatches) {
>               int j;
> @@ -139,6 +209,7 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>                   char *m = g_ptr_array_index(imatches, j);
>                   if (g_str_has_prefix(insn_disas, m)) {
>                       skip = false;
> +                    check_regs_next = rmatches;
>                   }
>               }
>           }
> @@ -153,8 +224,39 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>               }
>           }
>   
> +        /*
> +         * Check the disassembly to see if a register we care about
> +         * will be affected by this instruction. This relies on the > +         * dissembler doing something sensible for the registers we

nit: disassembler

> +         * care about.
> +         */
> +        if (disas_assist && rmatches) {
> +            check_regs_next = false;
> +            gchar *args = g_strstr_len(insn_disas, -1, " ");
> +            for (int n = 0; n < all_reg_names->len; n++) {
> +                gchar *reg = g_ptr_array_index(all_reg_names, n);
> +                if (g_strrstr(args, reg)) {
> +                    check_regs_next = true;
> +                    skip = false;
> +                }
> +            }
> +        }
> +
> +        /*
> +         * We now have 3 choices:
> +         *
> +         * - Log insn
> +         * - Log insn while checking registers
> +         * - Don't log this insn but check if last insn changed registers
> +         */
> +
>           if (skip) {
> -            g_free(insn_disas);
> +            if (check_regs_this) {
> +                qemu_plugin_register_vcpu_insn_exec_cb(insn,
> +                                                       vcpu_insn_exec_only_regs,
> +                                                       QEMU_PLUGIN_CB_R_REGS,
> +                                                       NULL);
> +            }
>           } else {
>               uint32_t insn_opcode;
>               insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
> @@ -167,30 +269,125 @@ static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
>                                                QEMU_PLUGIN_MEM_RW, NULL);
>   
>               /* Register callback on instruction */
> -            qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
> -                                                   QEMU_PLUGIN_CB_NO_REGS, output);
> +            if (check_regs_this) {
> +                qemu_plugin_register_vcpu_insn_exec_cb(
> +                    insn, vcpu_insn_exec_with_regs,
> +                    QEMU_PLUGIN_CB_R_REGS,
> +                    output);
> +            } else {
> +                qemu_plugin_register_vcpu_insn_exec_cb(
> +                    insn, vcpu_insn_exec,
> +                    QEMU_PLUGIN_CB_NO_REGS,
> +                    output);
> +            }
>   
>               /* reset skip */
>               skip = (imatches || amatches);
>           }
>   
> +        /* set regs for next */
> +        if (disas_assist && rmatches) {
> +            check_regs_this = check_regs_next;
> +        }
> +
> +        g_free(insn_disas);
>       }
>   }
>   
> +static Register *init_vcpu_register(int vcpu_index,
> +                                    qemu_plugin_reg_descriptor *desc)
> +{
> +    Register *reg = g_new0(Register, 1);
> +    g_autofree gchar *lower = g_utf8_strdown(desc->name, -1);
> +    int r;
> +
> +    reg->handle = desc->handle;
> +    reg->name = g_intern_string(lower);
> +    reg->last = g_byte_array_new();
> +    reg->new = g_byte_array_new();
> +
> +    /* read the initial value */
> +    r = qemu_plugin_read_register(vcpu_index, reg->handle, reg->last);
> +    g_assert(r > 0);
> +    return reg;
> +}
> +
> +static GPtrArray *registers_init(int vcpu_index)
> +{
> +    g_autoptr(GPtrArray) registers = g_ptr_array_new();
> +    g_autoptr(GArray) reg_list = qemu_plugin_get_registers(vcpu_index);
> +
> +    if (rmatches && reg_list && reg_list->len) {
> +        /*
> +         * Go through each register in the complete list and
> +         * see if we want to track it.
> +         */
> +        for (int r = 0; r < reg_list->len; r++) {
> +            qemu_plugin_reg_descriptor *rd = &g_array_index(
> +                reg_list, qemu_plugin_reg_descriptor, r);
> +            for (int p = 0; p < rmatches->len; p++) {
> +                g_autoptr(GPatternSpec) pat = g_pattern_spec_new(rmatches->pdata[p]);
> +                g_autofree gchar *rd_lower = g_utf8_strdown(rd->name, -1);
> +                if (g_pattern_match_string(pat, rd->name) ||
> +                    g_pattern_match_string(pat, rd_lower)) {
> +                    Register *reg = init_vcpu_register(vcpu_index, rd);
> +                    g_ptr_array_add(registers, reg);
> +
> +                    /* we need a list of regnames at TB translation time */
> +                    if (disas_assist) {
> +                        g_mutex_lock(&add_reg_name_lock);
> +                        if (!g_ptr_array_find(all_reg_names, reg->name, NULL)) {
> +                            g_ptr_array_add(all_reg_names, reg->name);
> +                        }
> +                        g_mutex_unlock(&add_reg_name_lock);
> +                    }
> +                }
> +            }
> +        }
> +    }
> +
> +    return registers->len ? g_steal_pointer(&registers) : NULL;
> +}
> +
> +/*
> + * Initialise a new vcpu/thread with:
> + *   - last_exec tracking data
> + *   - list of tracked registers
> + *   - initial value of registers
> + *
> + * As we could have multiple threads trying to do this we need to
> + * serialise the expansion under a lock.
> + */
> +static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
> +{
> +    CPU *c;
> +
> +    g_rw_lock_writer_lock(&expand_array_lock);
> +    if (vcpu_index >= cpus->len) {
> +        g_array_set_size(cpus, vcpu_index + 1);
> +    }
> +    g_rw_lock_writer_unlock(&expand_array_lock);
> +
> +    c = get_cpu(vcpu_index);
> +    c->last_exec = g_string_new(NULL);
> +    c->registers = registers_init(vcpu_index);
> +}
> +
>   /**
>    * On plugin exit, print last instruction in cache
>    */
>   static void plugin_exit(qemu_plugin_id_t id, void *p)
>   {
>       guint i;
> -    GString *s;
> -    for (i = 0; i < last_exec->len; i++) {
> -        s = g_ptr_array_index(last_exec, i);
> -        if (s->str) {
> -            qemu_plugin_outs(s->str);
> +    g_rw_lock_reader_lock(&expand_array_lock);
> +    for (i = 0; i < cpus->len; i++) {
> +        CPU *c = get_cpu(i);
> +        if (c->last_exec && c->last_exec->str) {
> +            qemu_plugin_outs(c->last_exec->str);
>               qemu_plugin_outs("\n");
>           }
>       }
> +    g_rw_lock_reader_unlock(&expand_array_lock);
>   }
>   
>   /* Add a match to the array of matches */
> @@ -212,6 +409,18 @@ static void parse_vaddr_match(char *match)
>       g_array_append_val(amatches, v);
>   }
>   
> +/*
> + * We have to wait until vCPUs are started before we can check the
> + * patterns find anything.
> + */
> +static void add_regpat(char *regpat)
> +{
> +    if (!rmatches) {
> +        rmatches = g_ptr_array_new();
> +    }
> +    g_ptr_array_add(rmatches, g_strdup(regpat));
> +}
> +
>   /**
>    * Install the plugin
>    */
> @@ -223,11 +432,8 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>        * Initialize dynamic array to cache vCPU instruction. In user mode
>        * we don't know the size before emulation.
>        */
> -    if (info->system_emulation) {
> -        last_exec = g_ptr_array_sized_new(info->system.max_vcpus);
> -    } else {
> -        last_exec = g_ptr_array_new();
> -    }
> +    cpus = g_array_sized_new(true, true, sizeof(CPU),
> +                             info->system_emulation ? info->system.max_vcpus : 1);
>   
>       for (int i = 0; i < argc; i++) {
>           char *opt = argv[i];
> @@ -236,13 +442,22 @@ QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
>               parse_insn_match(tokens[1]);
>           } else if (g_strcmp0(tokens[0], "afilter") == 0) {
>               parse_vaddr_match(tokens[1]);
> +        } else if (g_strcmp0(tokens[0], "reg") == 0) {
> +            add_regpat(tokens[1]);
> +        } else if (g_strcmp0(tokens[0], "rdisas") == 0) {
> +            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &disas_assist)) {
> +                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
> +                return -1;
> +            }
> +            all_reg_names = g_ptr_array_new();
>           } else {
>               fprintf(stderr, "option parsing failed: %s\n", opt);
>               return -1;
>           }
>       }
>   
> -    /* Register translation block and exit callbacks */
> +    /* Register init, translation block and exit callbacks */
> +    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
>       qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
>       qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);
>

nit presented inline.
In more, you can squash this commit from another series, which fixes 
warnings added by this change: 
(https://lore.kernel.org/qemu-devel/20240118032400.3762658-1-pierrick.bouvier@linaro.org/T/#m5674d8f363d2c9f01a5458080a19ad9934cfc453)

Besides that, LGTM.

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
diff mbox series

Patch

diff --git a/docs/devel/tcg-plugins.rst b/docs/devel/tcg-plugins.rst
index 81dcd43a612..fa7421279f5 100644
--- a/docs/devel/tcg-plugins.rst
+++ b/docs/devel/tcg-plugins.rst
@@ -497,6 +497,22 @@  arguments if required::
   $ qemu-system-arm $(QEMU_ARGS) \
     -plugin ./contrib/plugins/libexeclog.so,ifilter=st1w,afilter=0x40001808 -d plugin
 
+This plugin can also dump registers when they change value. Specify the name of the
+registers with multiple ``reg`` options. You can also use glob style matching if you wish::
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+    -plugin ./contrib/plugins/libexeclog.so,reg=\*_el2,reg=sp -d plugin
+
+Be aware that each additional register to check will slow down
+execution quite considerably. You can optimise the number of register
+checks done by using the rdisas option. This will only instrument
+instructions that mention the registers in question in disassembly.
+This is not foolproof as some instructions implicitly change
+instructions. You can use the ifilter to catch these cases:
+
+  $ qemu-system-arm $(QEMU_ARGS) \
+    -plugin ./contrib/plugins/libexeclog.so,ifilter=msr,ifilter=blr,reg=x30,reg=\*_el1,rdisas=on
+
 - contrib/plugins/cache.c
 
 Cache modelling plugin that measures the performance of a given L1 cache
@@ -583,4 +599,3 @@  The following API is generated from the inline documentation in
 include the full kernel-doc annotations.
 
 .. kernel-doc:: include/qemu/qemu-plugin.h
-
diff --git a/contrib/plugins/execlog.c b/contrib/plugins/execlog.c
index f262e5555eb..c26664c0ab3 100644
--- a/contrib/plugins/execlog.c
+++ b/contrib/plugins/execlog.c
@@ -1,7 +1,7 @@ 
 /*
  * Copyright (C) 2021, Alexandre Iooss <erdnaxe@crans.org>
  *
- * Log instruction execution with memory access.
+ * Log instruction execution with memory access and register changes
  *
  * License: GNU GPL, version 2 or later.
  *   See the COPYING file in the top-level directory.
@@ -15,29 +15,40 @@ 
 
 #include <qemu-plugin.h>
 
+typedef struct {
+    struct qemu_plugin_register *handle;
+    GByteArray *last;
+    GByteArray *new;
+    const char *name;
+} Register;
+
+typedef struct CPU {
+    /* Store last executed instruction on each vCPU as a GString */
+    GString *last_exec;
+    /* Ptr array of Register */
+    GPtrArray *registers;
+} CPU;
+
 QEMU_PLUGIN_EXPORT int qemu_plugin_version = QEMU_PLUGIN_VERSION;
 
-/* Store last executed instruction on each vCPU as a GString */
-static GPtrArray *last_exec;
+static GArray *cpus;
 static GRWLock expand_array_lock;
 
 static GPtrArray *imatches;
 static GArray *amatches;
+static GPtrArray *rmatches;
+static bool disas_assist;
+static GMutex add_reg_name_lock;
+static GPtrArray *all_reg_names;
 
-/*
- * Expand last_exec array.
- *
- * As we could have multiple threads trying to do this we need to
- * serialise the expansion under a lock.
- */
-static void expand_last_exec(int cpu_index)
+static CPU *get_cpu(int vcpu_index)
 {
-    g_rw_lock_writer_lock(&expand_array_lock);
-    while (cpu_index >= last_exec->len) {
-        GString *s = g_string_new(NULL);
-        g_ptr_array_add(last_exec, s);
-    }
-    g_rw_lock_writer_unlock(&expand_array_lock);
+    CPU *c;
+    g_rw_lock_reader_lock(&expand_array_lock);
+    c = &g_array_index(cpus, CPU, vcpu_index);
+    g_rw_lock_reader_unlock(&expand_array_lock);
+
+    return c;
 }
 
 /**
@@ -46,13 +57,10 @@  static void expand_last_exec(int cpu_index)
 static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
                      uint64_t vaddr, void *udata)
 {
-    GString *s;
+    CPU *c = get_cpu(cpu_index);
+    GString *s = c->last_exec;
 
     /* Find vCPU in array */
-    g_rw_lock_reader_lock(&expand_array_lock);
-    g_assert(cpu_index < last_exec->len);
-    s = g_ptr_array_index(last_exec, cpu_index);
-    g_rw_lock_reader_unlock(&expand_array_lock);
 
     /* Indicate type of memory access */
     if (qemu_plugin_mem_is_store(info)) {
@@ -73,32 +81,91 @@  static void vcpu_mem(unsigned int cpu_index, qemu_plugin_meminfo_t info,
 }
 
 /**
- * Log instruction execution
+ * Log instruction execution, outputting the last one.
+ *
+ * vcpu_insn_exec() is a copy and paste of vcpu_insn_exec_with_regs()
+ * without the checking of register values when we've attempted to
+ * optimise with disas_assist.
  */
-static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+static void insn_check_regs(int vcpu_index, CPU *cpu)
 {
-    GString *s;
+    for (int n = 0; n < cpu->registers->len; n++) {
+        Register *reg = cpu->registers->pdata[n];
+        int sz;
 
-    /* Find or create vCPU in array */
-    g_rw_lock_reader_lock(&expand_array_lock);
-    if (cpu_index >= last_exec->len) {
-        g_rw_lock_reader_unlock(&expand_array_lock);
-        expand_last_exec(cpu_index);
-        g_rw_lock_reader_lock(&expand_array_lock);
+        g_byte_array_set_size(reg->new, 0);
+        sz = qemu_plugin_read_register(vcpu_index, reg->handle, reg->new);
+        g_assert(sz == reg->last->len);
+
+        if (memcmp(reg->last->data, reg->new->data, sz)) {
+            GByteArray *temp = reg->last;
+            g_string_append_printf(cpu->last_exec, ", %s -> 0x", reg->name);
+            /* TODO: handle BE properly */
+            for (int i = sz; i >= 0; i--) {
+                g_string_append_printf(cpu->last_exec, "%02x",
+                                       reg->new->data[i]);
+            }
+            reg->last = reg->new;
+            reg->new = temp;
+        }
+    }
+}
+
+/* Log last instruction while checking registers */
+static void vcpu_insn_exec_with_regs(unsigned int cpu_index, void *udata)
+{
+    CPU *cpu = get_cpu(cpu_index);
+
+    /* Print previous instruction in cache */
+    if (cpu->last_exec->len) {
+        if (cpu->registers) {
+            insn_check_regs(cpu_index, cpu);
+        }
+
+        qemu_plugin_outs(cpu->last_exec->str);
+        qemu_plugin_outs("\n");
+    }
+
+    /* Store new instruction in cache */
+    /* vcpu_mem will add memory access information to last_exec */
+    g_string_printf(cpu->last_exec, "%u, ", cpu_index);
+    g_string_append(cpu->last_exec, (char *)udata);
+}
+
+/* Log last instruction while checking registers, ignore next */
+static void vcpu_insn_exec_only_regs(unsigned int cpu_index, void *udata)
+{
+    CPU *cpu = get_cpu(cpu_index);
+
+    /* Print previous instruction in cache */
+    if (cpu->last_exec->len) {
+        if (cpu->registers) {
+            insn_check_regs(cpu_index, cpu);
+        }
+
+        qemu_plugin_outs(cpu->last_exec->str);
+        qemu_plugin_outs("\n");
     }
-    s = g_ptr_array_index(last_exec, cpu_index);
-    g_rw_lock_reader_unlock(&expand_array_lock);
+
+    /* reset */
+    cpu->last_exec->len = 0;
+}
+
+/* Log last instruction without checking regs, setup next */
+static void vcpu_insn_exec(unsigned int cpu_index, void *udata)
+{
+    CPU *cpu = get_cpu(cpu_index);
 
     /* Print previous instruction in cache */
-    if (s->len) {
-        qemu_plugin_outs(s->str);
+    if (cpu->last_exec->len) {
+        qemu_plugin_outs(cpu->last_exec->str);
         qemu_plugin_outs("\n");
     }
 
     /* Store new instruction in cache */
     /* vcpu_mem will add memory access information to last_exec */
-    g_string_printf(s, "%u, ", cpu_index);
-    g_string_append(s, (char *)udata);
+    g_string_printf(cpu->last_exec, "%u, ", cpu_index);
+    g_string_append(cpu->last_exec, (char *)udata);
 }
 
 /**
@@ -111,6 +178,8 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
 {
     struct qemu_plugin_insn *insn;
     bool skip = (imatches || amatches);
+    bool check_regs_this = rmatches;
+    bool check_regs_next = false;
 
     size_t n = qemu_plugin_tb_n_insns(tb);
     for (size_t i = 0; i < n; i++) {
@@ -131,7 +200,8 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
         /*
          * If we are filtering we better check out if we have any
          * hits. The skip "latches" so we can track memory accesses
-         * after the instruction we care about.
+         * after the instruction we care about. Also enable register
+         * checking on the next instruction.
          */
         if (skip && imatches) {
             int j;
@@ -139,6 +209,7 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
                 char *m = g_ptr_array_index(imatches, j);
                 if (g_str_has_prefix(insn_disas, m)) {
                     skip = false;
+                    check_regs_next = rmatches;
                 }
             }
         }
@@ -153,8 +224,39 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
             }
         }
 
+        /*
+         * Check the disassembly to see if a register we care about
+         * will be affected by this instruction. This relies on the
+         * dissembler doing something sensible for the registers we
+         * care about.
+         */
+        if (disas_assist && rmatches) {
+            check_regs_next = false;
+            gchar *args = g_strstr_len(insn_disas, -1, " ");
+            for (int n = 0; n < all_reg_names->len; n++) {
+                gchar *reg = g_ptr_array_index(all_reg_names, n);
+                if (g_strrstr(args, reg)) {
+                    check_regs_next = true;
+                    skip = false;
+                }
+            }
+        }
+
+        /*
+         * We now have 3 choices:
+         *
+         * - Log insn
+         * - Log insn while checking registers
+         * - Don't log this insn but check if last insn changed registers
+         */
+
         if (skip) {
-            g_free(insn_disas);
+            if (check_regs_this) {
+                qemu_plugin_register_vcpu_insn_exec_cb(insn,
+                                                       vcpu_insn_exec_only_regs,
+                                                       QEMU_PLUGIN_CB_R_REGS,
+                                                       NULL);
+            }
         } else {
             uint32_t insn_opcode;
             insn_opcode = *((uint32_t *)qemu_plugin_insn_data(insn));
@@ -167,30 +269,125 @@  static void vcpu_tb_trans(qemu_plugin_id_t id, struct qemu_plugin_tb *tb)
                                              QEMU_PLUGIN_MEM_RW, NULL);
 
             /* Register callback on instruction */
-            qemu_plugin_register_vcpu_insn_exec_cb(insn, vcpu_insn_exec,
-                                                   QEMU_PLUGIN_CB_NO_REGS, output);
+            if (check_regs_this) {
+                qemu_plugin_register_vcpu_insn_exec_cb(
+                    insn, vcpu_insn_exec_with_regs,
+                    QEMU_PLUGIN_CB_R_REGS,
+                    output);
+            } else {
+                qemu_plugin_register_vcpu_insn_exec_cb(
+                    insn, vcpu_insn_exec,
+                    QEMU_PLUGIN_CB_NO_REGS,
+                    output);
+            }
 
             /* reset skip */
             skip = (imatches || amatches);
         }
 
+        /* set regs for next */
+        if (disas_assist && rmatches) {
+            check_regs_this = check_regs_next;
+        }
+
+        g_free(insn_disas);
     }
 }
 
+static Register *init_vcpu_register(int vcpu_index,
+                                    qemu_plugin_reg_descriptor *desc)
+{
+    Register *reg = g_new0(Register, 1);
+    g_autofree gchar *lower = g_utf8_strdown(desc->name, -1);
+    int r;
+
+    reg->handle = desc->handle;
+    reg->name = g_intern_string(lower);
+    reg->last = g_byte_array_new();
+    reg->new = g_byte_array_new();
+
+    /* read the initial value */
+    r = qemu_plugin_read_register(vcpu_index, reg->handle, reg->last);
+    g_assert(r > 0);
+    return reg;
+}
+
+static GPtrArray *registers_init(int vcpu_index)
+{
+    g_autoptr(GPtrArray) registers = g_ptr_array_new();
+    g_autoptr(GArray) reg_list = qemu_plugin_get_registers(vcpu_index);
+
+    if (rmatches && reg_list && reg_list->len) {
+        /*
+         * Go through each register in the complete list and
+         * see if we want to track it.
+         */
+        for (int r = 0; r < reg_list->len; r++) {
+            qemu_plugin_reg_descriptor *rd = &g_array_index(
+                reg_list, qemu_plugin_reg_descriptor, r);
+            for (int p = 0; p < rmatches->len; p++) {
+                g_autoptr(GPatternSpec) pat = g_pattern_spec_new(rmatches->pdata[p]);
+                g_autofree gchar *rd_lower = g_utf8_strdown(rd->name, -1);
+                if (g_pattern_match_string(pat, rd->name) ||
+                    g_pattern_match_string(pat, rd_lower)) {
+                    Register *reg = init_vcpu_register(vcpu_index, rd);
+                    g_ptr_array_add(registers, reg);
+
+                    /* we need a list of regnames at TB translation time */
+                    if (disas_assist) {
+                        g_mutex_lock(&add_reg_name_lock);
+                        if (!g_ptr_array_find(all_reg_names, reg->name, NULL)) {
+                            g_ptr_array_add(all_reg_names, reg->name);
+                        }
+                        g_mutex_unlock(&add_reg_name_lock);
+                    }
+                }
+            }
+        }
+    }
+
+    return registers->len ? g_steal_pointer(&registers) : NULL;
+}
+
+/*
+ * Initialise a new vcpu/thread with:
+ *   - last_exec tracking data
+ *   - list of tracked registers
+ *   - initial value of registers
+ *
+ * As we could have multiple threads trying to do this we need to
+ * serialise the expansion under a lock.
+ */
+static void vcpu_init(qemu_plugin_id_t id, unsigned int vcpu_index)
+{
+    CPU *c;
+
+    g_rw_lock_writer_lock(&expand_array_lock);
+    if (vcpu_index >= cpus->len) {
+        g_array_set_size(cpus, vcpu_index + 1);
+    }
+    g_rw_lock_writer_unlock(&expand_array_lock);
+
+    c = get_cpu(vcpu_index);
+    c->last_exec = g_string_new(NULL);
+    c->registers = registers_init(vcpu_index);
+}
+
 /**
  * On plugin exit, print last instruction in cache
  */
 static void plugin_exit(qemu_plugin_id_t id, void *p)
 {
     guint i;
-    GString *s;
-    for (i = 0; i < last_exec->len; i++) {
-        s = g_ptr_array_index(last_exec, i);
-        if (s->str) {
-            qemu_plugin_outs(s->str);
+    g_rw_lock_reader_lock(&expand_array_lock);
+    for (i = 0; i < cpus->len; i++) {
+        CPU *c = get_cpu(i);
+        if (c->last_exec && c->last_exec->str) {
+            qemu_plugin_outs(c->last_exec->str);
             qemu_plugin_outs("\n");
         }
     }
+    g_rw_lock_reader_unlock(&expand_array_lock);
 }
 
 /* Add a match to the array of matches */
@@ -212,6 +409,18 @@  static void parse_vaddr_match(char *match)
     g_array_append_val(amatches, v);
 }
 
+/*
+ * We have to wait until vCPUs are started before we can check the
+ * patterns find anything.
+ */
+static void add_regpat(char *regpat)
+{
+    if (!rmatches) {
+        rmatches = g_ptr_array_new();
+    }
+    g_ptr_array_add(rmatches, g_strdup(regpat));
+}
+
 /**
  * Install the plugin
  */
@@ -223,11 +432,8 @@  QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
      * Initialize dynamic array to cache vCPU instruction. In user mode
      * we don't know the size before emulation.
      */
-    if (info->system_emulation) {
-        last_exec = g_ptr_array_sized_new(info->system.max_vcpus);
-    } else {
-        last_exec = g_ptr_array_new();
-    }
+    cpus = g_array_sized_new(true, true, sizeof(CPU),
+                             info->system_emulation ? info->system.max_vcpus : 1);
 
     for (int i = 0; i < argc; i++) {
         char *opt = argv[i];
@@ -236,13 +442,22 @@  QEMU_PLUGIN_EXPORT int qemu_plugin_install(qemu_plugin_id_t id,
             parse_insn_match(tokens[1]);
         } else if (g_strcmp0(tokens[0], "afilter") == 0) {
             parse_vaddr_match(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "reg") == 0) {
+            add_regpat(tokens[1]);
+        } else if (g_strcmp0(tokens[0], "rdisas") == 0) {
+            if (!qemu_plugin_bool_parse(tokens[0], tokens[1], &disas_assist)) {
+                fprintf(stderr, "boolean argument parsing failed: %s\n", opt);
+                return -1;
+            }
+            all_reg_names = g_ptr_array_new();
         } else {
             fprintf(stderr, "option parsing failed: %s\n", opt);
             return -1;
         }
     }
 
-    /* Register translation block and exit callbacks */
+    /* Register init, translation block and exit callbacks */
+    qemu_plugin_register_vcpu_init_cb(id, vcpu_init);
     qemu_plugin_register_vcpu_tb_trans_cb(id, vcpu_tb_trans);
     qemu_plugin_register_atexit_cb(id, plugin_exit, NULL);