diff mbox series

[v3,18/28] tcg: Tidy tcg_n_regions

Message ID 20210502231844.1977630-19-richard.henderson@linaro.org
State Superseded
Headers show
Series tcg: Clean up code_gen_buffer allocation | expand

Commit Message

Richard Henderson May 2, 2021, 11:18 p.m. UTC
Compute the value using straight division and bounds,
rather than a loop.  Pass in tb_size rather than reading
from tcg_init_ctx.code_gen_buffer_size,

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 tcg/region.c | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

-- 
2.25.1

Comments

Alex Bennée June 8, 2021, 4:06 p.m. UTC | #1
Richard Henderson <richard.henderson@linaro.org> writes:

> Compute the value using straight division and bounds,

> rather than a loop.  Pass in tb_size rather than reading

> from tcg_init_ctx.code_gen_buffer_size,

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  tcg/region.c | 29 ++++++++++++-----------------

>  1 file changed, 12 insertions(+), 17 deletions(-)

>

> diff --git a/tcg/region.c b/tcg/region.c

> index bd81b35359..b44246e1aa 100644

> --- a/tcg/region.c

> +++ b/tcg/region.c

> @@ -363,38 +363,33 @@ void tcg_region_reset_all(void)

>      tcg_region_tree_reset_all();

>  }

>  

> -static size_t tcg_n_regions(unsigned max_cpus)

> +static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)

>  {

>  #ifdef CONFIG_USER_ONLY

>      return 1;

>  #else

> +    size_t n_regions;

> +

>      /*

>       * It is likely that some vCPUs will translate more code than others,

>       * so we first try to set more regions than max_cpus, with those regions

>       * being of reasonable size. If that's not possible we make do by evenly

>       * dividing the code_gen_buffer among the vCPUs.

>       */

> -    size_t i;

> -

>      /* Use a single region if all we have is one vCPU thread */

>      if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {

>          return 1;

>      }

>  

> -    /* Try to have more regions than max_cpus, with each region being >= 2 MB */

> -    for (i = 8; i > 0; i--) {

> -        size_t regions_per_thread = i;

> -        size_t region_size;

> -

> -        region_size = tcg_init_ctx.code_gen_buffer_size;

> -        region_size /= max_cpus * regions_per_thread;

> -

> -        if (region_size >= 2 * 1024u * 1024) {

> -            return max_cpus * regions_per_thread;

> -        }

> +    /*

> +     * Try to have more regions than max_cpus, with each region being >= 2 MB.

> +     * If we can't, then just allocate one region per vCPU thread.

> +     */

> +    n_regions = tb_size / (2 * MiB);

> +    if (n_regions <= max_cpus) {

> +        return max_cpus;

>      }

> -    /* If we can't, then just allocate one region per vCPU thread */

> -    return max_cpus;

> +    return MIN(n_regions, max_cpus * 8);

>  #endif

>  }


This is so much easier to follow now ;-)

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>


-- 
Alex Bennée
Luis Fernando Fujita Pires June 9, 2021, 2:58 p.m. UTC | #2
From: Richard Henderson <richard.henderson@linaro.org>

> Compute the value using straight division and bounds, rather than a loop.  Pass

> in tb_size rather than reading from tcg_init_ctx.code_gen_buffer_size,

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  tcg/region.c | 29 ++++++++++++-----------------

>  1 file changed, 12 insertions(+), 17 deletions(-)


Reviewed-by: Luis Pires <luis.pires@eldorado.org.br>


--
Luis Pires
Instituto de Pesquisas ELDORADO
Aviso Legal - Disclaimer <https://www.eldorado.org.br/disclaimer.html>
diff mbox series

Patch

diff --git a/tcg/region.c b/tcg/region.c
index bd81b35359..b44246e1aa 100644
--- a/tcg/region.c
+++ b/tcg/region.c
@@ -363,38 +363,33 @@  void tcg_region_reset_all(void)
     tcg_region_tree_reset_all();
 }
 
-static size_t tcg_n_regions(unsigned max_cpus)
+static size_t tcg_n_regions(size_t tb_size, unsigned max_cpus)
 {
 #ifdef CONFIG_USER_ONLY
     return 1;
 #else
+    size_t n_regions;
+
     /*
      * It is likely that some vCPUs will translate more code than others,
      * so we first try to set more regions than max_cpus, with those regions
      * being of reasonable size. If that's not possible we make do by evenly
      * dividing the code_gen_buffer among the vCPUs.
      */
-    size_t i;
-
     /* Use a single region if all we have is one vCPU thread */
     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
         return 1;
     }
 
-    /* Try to have more regions than max_cpus, with each region being >= 2 MB */
-    for (i = 8; i > 0; i--) {
-        size_t regions_per_thread = i;
-        size_t region_size;
-
-        region_size = tcg_init_ctx.code_gen_buffer_size;
-        region_size /= max_cpus * regions_per_thread;
-
-        if (region_size >= 2 * 1024u * 1024) {
-            return max_cpus * regions_per_thread;
-        }
+    /*
+     * Try to have more regions than max_cpus, with each region being >= 2 MB.
+     * If we can't, then just allocate one region per vCPU thread.
+     */
+    n_regions = tb_size / (2 * MiB);
+    if (n_regions <= max_cpus) {
+        return max_cpus;
     }
-    /* If we can't, then just allocate one region per vCPU thread */
-    return max_cpus;
+    return MIN(n_regions, max_cpus * 8);
 #endif
 }
 
@@ -828,7 +823,7 @@  void tcg_region_init(size_t tb_size, int splitwx, unsigned max_cpus)
     buf = tcg_init_ctx.code_gen_buffer;
     total_size = tcg_init_ctx.code_gen_buffer_size;
     page_size = qemu_real_host_page_size;
-    n_regions = tcg_n_regions(max_cpus);
+    n_regions = tcg_n_regions(total_size, max_cpus);
 
     /* The first region will be 'aligned - buf' bytes larger than the others */
     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);