diff mbox series

[01/12] plugins: implement inline operation with cpu_index offset

Message ID 20240111142326.1743444-2-pierrick.bouvier@linaro.org
State Superseded
Headers show
Series TCG Plugin inline operation enhancement | expand

Commit Message

Pierrick Bouvier Jan. 11, 2024, 2:23 p.m. UTC
Instead of working on a fixed memory location, allow to index it based
on cpu_index and a given offset (ptr + cpu_index * offset).
Current semantic is not modified as we use a 0 offset, thus inline
operation still targets always the same memory location.

Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
---
 accel/tcg/plugin-gen.c | 60 +++++++++++++++++++++++++++++++++++-------
 include/qemu/plugin.h  |  1 +
 plugins/api.c          |  7 ++---
 plugins/core.c         | 11 +++++---
 plugins/plugin.h       |  5 ++--
 5 files changed, 65 insertions(+), 19 deletions(-)

Comments

Richard Henderson Jan. 11, 2024, 10:04 p.m. UTC | #1
On 1/12/24 01:23, Pierrick Bouvier wrote:
> Instead of working on a fixed memory location, allow to index it based
> on cpu_index and a given offset (ptr + cpu_index * offset).
> Current semantic is not modified as we use a 0 offset, thus inline
> operation still targets always the same memory location.
> 
> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
> ---
>   accel/tcg/plugin-gen.c | 60 +++++++++++++++++++++++++++++++++++-------
>   include/qemu/plugin.h  |  1 +
>   plugins/api.c          |  7 ++---
>   plugins/core.c         | 11 +++++---
>   plugins/plugin.h       |  5 ++--
>   5 files changed, 65 insertions(+), 19 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

For the to-do list: add mul -> shl strength reduction in fold_mul().


r~
Pierrick Bouvier Jan. 12, 2024, 2:27 p.m. UTC | #2
On 1/12/24 02:04, Richard Henderson wrote:
> On 1/12/24 01:23, Pierrick Bouvier wrote:
>> Instead of working on a fixed memory location, allow to index it based
>> on cpu_index and a given offset (ptr + cpu_index * offset).
>> Current semantic is not modified as we use a 0 offset, thus inline
>> operation still targets always the same memory location.
>>
>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>> ---
>>    accel/tcg/plugin-gen.c | 60 +++++++++++++++++++++++++++++++++++-------
>>    include/qemu/plugin.h  |  1 +
>>    plugins/api.c          |  7 ++---
>>    plugins/core.c         | 11 +++++---
>>    plugins/plugin.h       |  5 ++--
>>    5 files changed, 65 insertions(+), 19 deletions(-)
> 
> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
> 
> For the to-do list: add mul -> shl strength reduction in fold_mul().
>

Would you like me to add a todo somewhere about it? Or is it a reminder 
for follow-up work?

> 
> r~
Richard Henderson Jan. 12, 2024, 10:22 p.m. UTC | #3
On 1/13/24 01:27, Pierrick Bouvier wrote:
> On 1/12/24 02:04, Richard Henderson wrote:
>> On 1/12/24 01:23, Pierrick Bouvier wrote:
>>> Instead of working on a fixed memory location, allow to index it based
>>> on cpu_index and a given offset (ptr + cpu_index * offset).
>>> Current semantic is not modified as we use a 0 offset, thus inline
>>> operation still targets always the same memory location.
>>>
>>> Signed-off-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
>>> ---
>>>    accel/tcg/plugin-gen.c | 60 +++++++++++++++++++++++++++++++++++-------
>>>    include/qemu/plugin.h  |  1 +
>>>    plugins/api.c          |  7 ++---
>>>    plugins/core.c         | 11 +++++---
>>>    plugins/plugin.h       |  5 ++--
>>>    5 files changed, 65 insertions(+), 19 deletions(-)
>>
>> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
>>
>> For the to-do list: add mul -> shl strength reduction in fold_mul().
>>
> 
> Would you like me to add a todo somewhere about it? Or is it a reminder for follow-up work?

It's a reminder to myself for follow-up-work.


r~
diff mbox series

Patch

diff --git a/accel/tcg/plugin-gen.c b/accel/tcg/plugin-gen.c
index 78b331b2510..fc9d3ee23bc 100644
--- a/accel/tcg/plugin-gen.c
+++ b/accel/tcg/plugin-gen.c
@@ -118,16 +118,28 @@  static void gen_empty_udata_cb(void)
  */
 static void gen_empty_inline_cb(void)
 {
+    TCGv_i32 cpu_index = tcg_temp_ebb_new_i32();
+    TCGv_ptr cpu_index_as_ptr = tcg_temp_ebb_new_ptr();
     TCGv_i64 val = tcg_temp_ebb_new_i64();
     TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
 
+    tcg_gen_ld_i32(cpu_index, tcg_env,
+                   -offsetof(ArchCPU, env) + offsetof(CPUState, cpu_index));
+    /* pass an immediate != 0 so that it doesn't get optimized away */
+    tcg_gen_muli_i32(cpu_index, cpu_index, 0xdeadbeef);
+    tcg_gen_ext_i32_ptr(cpu_index_as_ptr, cpu_index);
+
     tcg_gen_movi_ptr(ptr, 0);
+    tcg_gen_add_ptr(ptr, ptr, cpu_index_as_ptr);
     tcg_gen_ld_i64(val, ptr, 0);
     /* pass an immediate != 0 so that it doesn't get optimized away */
     tcg_gen_addi_i64(val, val, 0xdeadface);
+
     tcg_gen_st_i64(val, ptr, 0);
     tcg_temp_free_ptr(ptr);
     tcg_temp_free_i64(val);
+    tcg_temp_free_ptr(cpu_index_as_ptr);
+    tcg_temp_free_i32(cpu_index);
 }
 
 static void gen_empty_mem_cb(TCGv_i64 addr, uint32_t info)
@@ -274,12 +286,37 @@  static TCGOp *copy_const_ptr(TCGOp **begin_op, TCGOp *op, void *ptr)
     return op;
 }
 
+static TCGOp *copy_ld_i32(TCGOp **begin_op, TCGOp *op)
+{
+    return copy_op(begin_op, op, INDEX_op_ld_i32);
+}
+
+static TCGOp *copy_ext_i32_ptr(TCGOp **begin_op, TCGOp *op)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        op = copy_op(begin_op, op, INDEX_op_mov_i32);
+    } else {
+        op = copy_op(begin_op, op, INDEX_op_ext_i32_i64);
+    }
+    return op;
+}
+
+static TCGOp *copy_add_ptr(TCGOp **begin_op, TCGOp *op)
+{
+    if (UINTPTR_MAX == UINT32_MAX) {
+        op = copy_op(begin_op, op, INDEX_op_add_i32);
+    } else {
+        op = copy_op(begin_op, op, INDEX_op_add_i64);
+    }
+    return op;
+}
+
 static TCGOp *copy_ld_i64(TCGOp **begin_op, TCGOp *op)
 {
     if (TCG_TARGET_REG_BITS == 32) {
         /* 2x ld_i32 */
-        op = copy_op(begin_op, op, INDEX_op_ld_i32);
-        op = copy_op(begin_op, op, INDEX_op_ld_i32);
+        op = copy_ld_i32(begin_op, op);
+        op = copy_ld_i32(begin_op, op);
     } else {
         /* ld_i64 */
         op = copy_op(begin_op, op, INDEX_op_ld_i64);
@@ -315,6 +352,13 @@  static TCGOp *copy_add_i64(TCGOp **begin_op, TCGOp *op, uint64_t v)
     return op;
 }
 
+static TCGOp *copy_mul_i32(TCGOp **begin_op, TCGOp *op, uint32_t v)
+{
+    op = copy_op(begin_op, op, INDEX_op_mul_i32);
+    op->args[2] = tcgv_i32_arg(tcg_constant_i32(v));
+    return op;
+}
+
 static TCGOp *copy_st_ptr(TCGOp **begin_op, TCGOp *op)
 {
     if (UINTPTR_MAX == UINT32_MAX) {
@@ -380,18 +424,14 @@  static TCGOp *append_inline_cb(const struct qemu_plugin_dyn_cb *cb,
                                TCGOp *begin_op, TCGOp *op,
                                int *unused)
 {
-    /* const_ptr */
+    op = copy_ld_i32(&begin_op, op);
+    op = copy_mul_i32(&begin_op, op, cb->userp_offset);
+    op = copy_ext_i32_ptr(&begin_op, op);
     op = copy_const_ptr(&begin_op, op, cb->userp);
-
-    /* ld_i64 */
+    op = copy_add_ptr(&begin_op, op);
     op = copy_ld_i64(&begin_op, op);
-
-    /* add_i64 */
     op = copy_add_i64(&begin_op, op, cb->inline_insn.imm);
-
-    /* st_i64 */
     op = copy_st_i64(&begin_op, op);
-
     return op;
 }
 
diff --git a/include/qemu/plugin.h b/include/qemu/plugin.h
index 7fdc3a4849f..4548affc295 100644
--- a/include/qemu/plugin.h
+++ b/include/qemu/plugin.h
@@ -85,6 +85,7 @@  enum plugin_dyn_cb_subtype {
 struct qemu_plugin_dyn_cb {
     union qemu_plugin_cb_sig f;
     void *userp;
+    size_t userp_offset;
     enum plugin_dyn_cb_subtype type;
     /* @rw applies to mem callbacks only (both regular and inline) */
     enum qemu_plugin_mem_rw rw;
diff --git a/plugins/api.c b/plugins/api.c
index 5521b0ad36c..0fcce825680 100644
--- a/plugins/api.c
+++ b/plugins/api.c
@@ -99,7 +99,8 @@  void qemu_plugin_register_vcpu_tb_exec_inline(struct qemu_plugin_tb *tb,
                                               void *ptr, uint64_t imm)
 {
     if (!tb->mem_only) {
-        plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE], 0, op, ptr, imm);
+        plugin_register_inline_op(&tb->cbs[PLUGIN_CB_INLINE],
+                                  0, op, ptr, 0, imm);
     }
 }
 
@@ -120,7 +121,7 @@  void qemu_plugin_register_vcpu_insn_exec_inline(struct qemu_plugin_insn *insn,
 {
     if (!insn->mem_only) {
         plugin_register_inline_op(&insn->cbs[PLUGIN_CB_INSN][PLUGIN_CB_INLINE],
-                                  0, op, ptr, imm);
+                                  0, op, ptr, 0, imm);
     }
 }
 
@@ -145,7 +146,7 @@  void qemu_plugin_register_vcpu_mem_inline(struct qemu_plugin_insn *insn,
                                           uint64_t imm)
 {
     plugin_register_inline_op(&insn->cbs[PLUGIN_CB_MEM][PLUGIN_CB_INLINE],
-                              rw, op, ptr, imm);
+                              rw, op, ptr, 0, imm);
 }
 
 void qemu_plugin_register_vcpu_tb_trans_cb(qemu_plugin_id_t id,
diff --git a/plugins/core.c b/plugins/core.c
index 49588285dd0..cc6d7720b1f 100644
--- a/plugins/core.c
+++ b/plugins/core.c
@@ -280,13 +280,15 @@  static struct qemu_plugin_dyn_cb *plugin_get_dyn_cb(GArray **arr)
 
 void plugin_register_inline_op(GArray **arr,
                                enum qemu_plugin_mem_rw rw,
-                               enum qemu_plugin_op op, void *ptr,
+                               enum qemu_plugin_op op,
+                               void *ptr, size_t offset,
                                uint64_t imm)
 {
     struct qemu_plugin_dyn_cb *dyn_cb;
 
     dyn_cb = plugin_get_dyn_cb(arr);
     dyn_cb->userp = ptr;
+    dyn_cb->userp_offset = offset;
     dyn_cb->type = PLUGIN_CB_INLINE;
     dyn_cb->rw = rw;
     dyn_cb->inline_insn.op = op;
@@ -431,9 +433,10 @@  void qemu_plugin_flush_cb(void)
     plugin_cb__simple(QEMU_PLUGIN_EV_FLUSH);
 }
 
-void exec_inline_op(struct qemu_plugin_dyn_cb *cb)
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index)
 {
-    uint64_t *val = cb->userp;
+    const size_t offset = cpu_index * cb->userp_offset;
+    uint64_t *val = (uint64_t *)((char *) cb->userp + offset);
 
     switch (cb->inline_insn.op) {
     case QEMU_PLUGIN_INLINE_ADD_U64:
@@ -466,7 +469,7 @@  void qemu_plugin_vcpu_mem_cb(CPUState *cpu, uint64_t vaddr,
                            vaddr, cb->userp);
             break;
         case PLUGIN_CB_INLINE:
-            exec_inline_op(cb);
+            exec_inline_op(cb, cpu->cpu_index);
             break;
         default:
             g_assert_not_reached();
diff --git a/plugins/plugin.h b/plugins/plugin.h
index 5eb2fdbc85e..e597ef3c30e 100644
--- a/plugins/plugin.h
+++ b/plugins/plugin.h
@@ -66,7 +66,8 @@  struct qemu_plugin_ctx *plugin_id_to_ctx_locked(qemu_plugin_id_t id);
 
 void plugin_register_inline_op(GArray **arr,
                                enum qemu_plugin_mem_rw rw,
-                               enum qemu_plugin_op op, void *ptr,
+                               enum qemu_plugin_op op,
+                               void *ptr, size_t offset,
                                uint64_t imm);
 
 void plugin_reset_uninstall(qemu_plugin_id_t id,
@@ -95,6 +96,6 @@  void plugin_register_vcpu_mem_cb(GArray **arr,
                                  enum qemu_plugin_mem_rw rw,
                                  void *udata);
 
-void exec_inline_op(struct qemu_plugin_dyn_cb *cb);
+void exec_inline_op(struct qemu_plugin_dyn_cb *cb, int cpu_index);
 
 #endif /* PLUGIN_H */