diff mbox series

[v3,13/28] accel/tcg: Pass down max_cpus to tcg_init

Message ID 20210502231844.1977630-14-richard.henderson@linaro.org
State New
Headers show
Series tcg: Clean up code_gen_buffer allocation | expand

Commit Message

Richard Henderson May 2, 2021, 11:18 p.m. UTC
Start removing the include of hw/boards.h from tcg/.
Pass down the max_cpus value from tcg_init_machine,
where we have the MachineState already.

Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 include/tcg/tcg.h   |  2 +-
 tcg/internal.h      |  2 +-
 accel/tcg/tcg-all.c | 10 +++++++++-
 tcg/region.c        | 32 +++++++++++---------------------
 tcg/tcg.c           | 10 ++++------
 5 files changed, 26 insertions(+), 30 deletions(-)

-- 
2.25.1

Comments

Alex Bennée June 8, 2021, 3:44 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Start removing the include of hw/boards.h from tcg/.

> Pass down the max_cpus value from tcg_init_machine,

> where we have the MachineState already.

>

> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

<snip>
> -static size_t tcg_n_regions(void)

> -{

> +    /*

> +     * It is likely that some vCPUs will translate more code than others,

> +     * so we first try to set more regions than max_cpus, with those regions

> +     * being of reasonable size. If that's not possible we make do by evenly

> +     * dividing the code_gen_buffer among the vCPUs.

> +     */

>      size_t i;

>


heh TIL. That said currently this "fancy" allocation method does nothing
for user-mode code generation which just gets to share one big code gen
buffer.

Thinking aloud I do recall an alternative approach is to have regions
more closely aligned with the guest memory map with a region lock which
any thread can acquire when it needs generate code for the guest region.

In user-mode for example that would be a region per DSO. In system mode
it may be simpler to have the current per-vCPU layout but consider if
all kernel mode code is generated in one buffer and can patch jumps
within that region? Of course any TLB flush that hits code in that
region would force regeneration of all that code but it shouldn't happen
for kernel mode code.

Anyway random musings aside:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>



-- 
Alex Bennée
Luis Fernando Fujita Pires June 9, 2021, 2:58 p.m. UTC | #2
From: Richard Henderson <richard.henderson@linaro.org>

> Start removing the include of hw/boards.h from tcg/.

> Pass down the max_cpus value from tcg_init_machine, where we have the

> MachineState already.

> 

> Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  include/tcg/tcg.h   |  2 +-

>  tcg/internal.h      |  2 +-

>  accel/tcg/tcg-all.c | 10 +++++++++-

>  tcg/region.c        | 32 +++++++++++---------------------

>  tcg/tcg.c           | 10 ++++------

>  5 files changed, 26 insertions(+), 30 deletions(-)


Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>


--
Luis Pires
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
diff mbox series

Patch

diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 3ad77ec34d..a0122c0dd3 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -906,7 +906,7 @@  static inline void *tcg_malloc(int size)
     }
 }
 
-void tcg_init(size_t tb_size, int splitwx);
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus);
 void tcg_register_thread(void);
 void tcg_prologue_init(TCGContext *s);
 void tcg_func_start(TCGContext *s);
diff --git a/tcg/internal.h b/tcg/internal.h
index f13c564d9b..fcfeca232f 100644
--- a/tcg/internal.h
+++ b/tcg/internal.h
@@ -30,7 +30,7 @@ 
 extern TCGContext **tcg_ctxs;
 extern unsigned int n_tcg_ctxs;
 
-void tcg_region_init(size_t tb_size, int splitwx);
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus);
 bool tcg_region_alloc(TCGContext *s);
 void tcg_region_initial_alloc(TCGContext *s);
 void tcg_region_prologue_set(TCGContext *s);
diff --git a/accel/tcg/tcg-all.c b/accel/tcg/tcg-all.c
index 0e83acbfe5..d2f2ddb844 100644
--- a/accel/tcg/tcg-all.c
+++ b/accel/tcg/tcg-all.c
@@ -32,6 +32,9 @@ 
 #include "qemu/error-report.h"
 #include "qemu/accel.h"
 #include "qapi/qapi-builtin-visit.h"
+#if !defined(CONFIG_USER_ONLY)
+#include "hw/boards.h"
+#endif
 #include "internal.h"
 
 struct TCGState {
@@ -109,13 +112,18 @@  bool mttcg_enabled;
 static int tcg_init_machine(MachineState *ms)
 {
     TCGState *s = TCG_STATE(current_accel());
+#ifdef CONFIG_USER_ONLY
+    unsigned max_cpus = 1;
+#else
+    unsigned max_cpus = ms->smp.max_cpus;
+#endif
 
     tcg_allowed = true;
     mttcg_enabled = s->mttcg_enabled;
 
     page_init();
     tb_htable_init();
-    tcg_init(s->tb_size * 1024 * 1024, s->splitwx_enabled);
+    tcg_init(s->tb_size * 1024 * 1024, s->splitwx_enabled, max_cpus);
 
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
diff --git a/tcg/region.c b/tcg/region.c
index ddcbf7113e..f9e93e85b3 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -27,9 +27,6 @@ 
 #include "qapi/error.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
-#if !defined(CONFIG_USER_ONLY)
-#include "hw/boards.h"
-#endif
 #include "internal.h"
 
 
@@ -366,27 +363,20 @@  void tcg_region_reset_all(void)
     tcg_region_tree_reset_all();
 }
 
+static size_t tcg_n_regions(unsigned max_cpus)
+{
 #ifdef CONFIG_USER_ONLY
-static size_t tcg_n_regions(void)
-{
     return 1;
-}
 #else
-/*
- * It is likely that some vCPUs will translate more code than others, so we
- * first try to set more regions than max_cpus, with those regions being of
- * reasonable size. If that's not possible we make do by evenly dividing
- * the code_gen_buffer among the vCPUs.
- */
-static size_t tcg_n_regions(void)
-{
+    /*
+     * It is likely that some vCPUs will translate more code than others,
+     * so we first try to set more regions than max_cpus, with those regions
+     * being of reasonable size. If that's not possible we make do by evenly
+     * dividing the code_gen_buffer among the vCPUs.
+     */
     size_t i;
 
     /* Use a single region if all we have is one vCPU thread */
-#if !defined(CONFIG_USER_ONLY)
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
-#endif
     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
         return 1;
     }
@@ -405,8 +395,8 @@  static size_t tcg_n_regions(void)
     }
     /* If we can't, then just allocate one region per vCPU thread */
     return max_cpus;
-}
 #endif
+}
 
 /* Minimum size of the code gen buffer.  This number is randomly chosen,
    but not so small that we can't have a fair number of TB's live.  */
@@ -838,7 +828,7 @@  static bool alloc_code_gen_buffer(size_t size, int splitwx, Error **errp)
  * in practice. Multi-threaded guests share most if not all of their translated
  * code, which makes parallel code generation less appealing than in softmmu.
  */
-void tcg_region_init(size_t tb_size, int splitwx)
+void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
 {
     void *buf, *aligned;
     size_t size;
@@ -855,7 +845,7 @@  void tcg_region_init(size_t tb_size, int splitwx)
     buf = tcg_init_ctx.code_gen_buffer;
     size = tcg_init_ctx.code_gen_buffer_size;
     page_size = qemu_real_host_page_size;
-    n_regions = tcg_n_regions();
+    n_regions = tcg_n_regions(max_cpus);
 
     /* The first region will be 'aligned - buf' bytes larger than the others */
     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 92749a2f8b..5af51d41ee 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -579,7 +579,7 @@  static void process_op_defs(TCGContext *s);
 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
                                             TCGReg reg, const char *name);
 
-static void tcg_context_init(void)
+static void tcg_context_init(unsigned max_cpus)
 {
     TCGContext *s = &tcg_init_ctx;
     int op, total_args, n, i;
@@ -648,8 +648,6 @@  static void tcg_context_init(void)
     tcg_ctxs = &tcg_ctx;
     n_tcg_ctxs = 1;
 #else
-    MachineState *ms = MACHINE(qdev_get_machine());
-    unsigned int max_cpus = ms->smp.max_cpus;
     tcg_ctxs = g_new(TCGContext *, max_cpus);
 #endif
 
@@ -658,10 +656,10 @@  static void tcg_context_init(void)
     cpu_env = temp_tcgv_ptr(ts);
 }
 
-void tcg_init(size_t tb_size, int splitwx)
+void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
 {
-    tcg_context_init();
-    tcg_region_init(tb_size, splitwx);
+    tcg_context_init(max_cpus);
+    tcg_region_init(tb_size, splitwx, max_cpus);
 }
 
 /*