accel/tcg: remove link between guest ram and TCG cache size

Message ID 20200226152710.31751-1-alex.bennee@linaro.org
State New
Headers show
Series
  • accel/tcg: remove link between guest ram and TCG cache size
Related show

Commit Message

Alex Bennée Feb. 26, 2020, 3:27 p.m.
Basing the TB cache size on the ram_size was always a little heuristic
and was broken by a1b18df9a4 which caused ram_size not to be fully
realised at the time we initialise the TCG translation cache.

At the same time the default code generation size seems mainly set to
deal with the fact we use a static code buffer for CONFIG_USER to
avoid mmap allocation problems on constrained systems. So we:

  - only use a static code buffer on 32 bit systems
  - up the default buffer size for bigger systems
  - ignore the ram_size and just go with the default
  - document the fact tb-size is ignored for 32 bit linux-user

The could potentially slow down softmmu emulation on 32 bit systems
with lots (3gb?) of spare memory. Those users can still manually up
the tb-size via the command line if they do in fact exist.

Fixes: a1b18df9a4
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Cc: Niek Linnenbank <nieklinnenbank@gmail.com>
Cc: Igor Mammedov <imammedo@redhat.com>
---
 accel/tcg/translate-all.c | 23 ++++++++++-------------
 qemu-options.hx           |  3 ++-
 2 files changed, 12 insertions(+), 14 deletions(-)

-- 
2.20.1

Comments

Igor Mammedov Feb. 26, 2020, 3:56 p.m. | #1
On Wed, 26 Feb 2020 15:27:10 +0000
Alex Bennée <alex.bennee@linaro.org> wrote:

> Basing the TB cache size on the ram_size was always a little heuristic

> and was broken by a1b18df9a4 which caused ram_size not to be fully

> realised at the time we initialise the TCG translation cache.

> 

> At the same time the default code generation size seems mainly set to

> deal with the fact we use a static code buffer for CONFIG_USER to

> avoid mmap allocation problems on constrained systems. So we:

> 

>   - only use a static code buffer on 32 bit systems

>   - up the default buffer size for bigger systems

>   - ignore the ram_size and just go with the default

>   - document the fact tb-size is ignored for 32 bit linux-user

> 

> The could potentially slow down softmmu emulation on 32 bit systems

> with lots (3gb?) of spare memory. Those users can still manually up

> the tb-size via the command line if they do in fact exist.

> 

> Fixes: a1b18df9a4

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Niek Linnenbank <nieklinnenbank@gmail.com>

> Cc: Igor Mammedov <imammedo@redhat.com>

> ---

>  accel/tcg/translate-all.c | 23 ++++++++++-------------

>  qemu-options.hx           |  3 ++-

>  2 files changed, 12 insertions(+), 14 deletions(-)

> 

> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c

> index a08ab11f657..cdfa2db7c56 100644

> --- a/accel/tcg/translate-all.c

> +++ b/accel/tcg/translate-all.c

> @@ -891,11 +891,12 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,

>      }

>  }

>  

> -#if defined(CONFIG_USER_ONLY)

> -/* Currently it is not recommended to allocate big chunks of data in

> -   user mode. It will change when a dedicated libc will be used.  */

> -/* ??? 64-bit hosts ought to have no problem mmaping data outside the

> -   region in which the guest needs to run.  Revisit this.  */

> +#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32

> +/*

> + * For user mode on smaller 32 bit systems we may run into trouble

> + * allocating big chunks of data in the right place. On these systems

> + * we utilise a static code generation buffer directly in the binary.

> + */

>  #define USE_STATIC_CODE_GEN_BUFFER

>  #endif

>  

> @@ -927,7 +928,11 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,

>  # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)

>  #endif

>  

> +#if TCG_TARGET_REG_BITS == 32

>  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)

> +#else

> +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (2ul * 1024 * 1024 * 1024)


I wonder how resource constrained CI VMs going to react to this jump
from current default ram size (128M) buffer size (32M).

> +#endif

>  

>  #define DEFAULT_CODE_GEN_BUFFER_SIZE \

>    (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \

> @@ -937,15 +942,7 @@ static inline size_t size_code_gen_buffer(size_t tb_size)

>  {

>      /* Size the buffer.  */

>      if (tb_size == 0) {

> -#ifdef USE_STATIC_CODE_GEN_BUFFER

>          tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;

> -#else

> -        /* ??? Needs adjustments.  */

> -        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the

> -           static buffer, we could size this on RESERVED_VA, on the text

> -           segment size of the executable, or continue to use the default.  */

> -        tb_size = (unsigned long)(ram_size / 4);

> -#endif

>      }

>      if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {

>          tb_size = MIN_CODE_GEN_BUFFER_SIZE;

> diff --git a/qemu-options.hx b/qemu-options.hx

> index ac315c1ac45..0a4bbdb8eb9 100644

> --- a/qemu-options.hx

> +++ b/qemu-options.hx

> @@ -139,7 +139,8 @@ irqchip completely is not recommended except for debugging purposes.

>  @item kvm-shadow-mem=size

>  Defines the size of the KVM shadow MMU.

>  @item tb-size=@var{n}

> -Controls the size (in MiB) of the TCG translation block cache.

> +Controls the size (in MiB) of the TCG translation block cache. It has no effect on

> +32 bit linux-user binaries.

linux-user doesn't have tb-size option so this hunk could be dropped

>  @item thread=single|multi

>  Controls number of TCG threads. When the TCG is multi-threaded there will be one

>  thread per vCPU therefor taking advantage of additional host cores. The default
Richard Henderson Feb. 26, 2020, 4:36 p.m. | #2
On 2/26/20 7:27 AM, Alex Bennée wrote:
> Basing the TB cache size on the ram_size was always a little heuristic

> and was broken by a1b18df9a4 which caused ram_size not to be fully

> realised at the time we initialise the TCG translation cache.

> 

> At the same time the default code generation size seems mainly set to

> deal with the fact we use a static code buffer for CONFIG_USER to

> avoid mmap allocation problems on constrained systems. So we:

> 

>   - only use a static code buffer on 32 bit systems

>   - up the default buffer size for bigger systems

>   - ignore the ram_size and just go with the default

>   - document the fact tb-size is ignored for 32 bit linux-user

> 

> The could potentially slow down softmmu emulation on 32 bit systems

> with lots (3gb?) of spare memory. Those users can still manually up

> the tb-size via the command line if they do in fact exist.

> 

> Fixes: a1b18df9a4

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Niek Linnenbank <nieklinnenbank@gmail.com>

> Cc: Igor Mammedov <imammedo@redhat.com>

> ---

...
> -#if defined(CONFIG_USER_ONLY)

> -/* Currently it is not recommended to allocate big chunks of data in

> -   user mode. It will change when a dedicated libc will be used.  */

> -/* ??? 64-bit hosts ought to have no problem mmaping data outside the

> -   region in which the guest needs to run.  Revisit this.  */

> +#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32

> +/*

> + * For user mode on smaller 32 bit systems we may run into trouble

> + * allocating big chunks of data in the right place. On these systems

> + * we utilise a static code generation buffer directly in the binary.

> + */

>  #define USE_STATIC_CODE_GEN_BUFFER

>  #endif

>  

> @@ -927,7 +928,11 @@ static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,

>  # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)

>  #endif

>  

> +#if TCG_TARGET_REG_BITS == 32

>  #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)

> +#else

> +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (2ul * 1024 * 1024 * 1024)

> +#endif


As Igor notes, there is no -tb-size X or -accel tcg,tb-size=Y option for
linux-user.  Therefore I'd prefer this patch merely fix the default for softmmu
and not change linux-user at all.

I agree it is somewhat silly to use the static code gen buffer on a 64-bit
host, but let's leave that to a separate patch.


r~

Patch

diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index a08ab11f657..cdfa2db7c56 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -891,11 +891,12 @@  static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
     }
 }
 
-#if defined(CONFIG_USER_ONLY)
-/* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used.  */
-/* ??? 64-bit hosts ought to have no problem mmaping data outside the
-   region in which the guest needs to run.  Revisit this.  */
+#if defined(CONFIG_USER_ONLY) && TCG_TARGET_REG_BITS == 32
+/*
+ * For user mode on smaller 32 bit systems we may run into trouble
+ * allocating big chunks of data in the right place. On these systems
+ * we utilise a static code generation buffer directly in the binary.
+ */
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
@@ -927,7 +928,11 @@  static void page_lock_pair(PageDesc **ret_p1, tb_page_addr_t phys1,
 # define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
 #endif
 
+#if TCG_TARGET_REG_BITS == 32
 #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+#else
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (2ul * 1024 * 1024 * 1024)
+#endif
 
 #define DEFAULT_CODE_GEN_BUFFER_SIZE \
   (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
@@ -937,15 +942,7 @@  static inline size_t size_code_gen_buffer(size_t tb_size)
 {
     /* Size the buffer.  */
     if (tb_size == 0) {
-#ifdef USE_STATIC_CODE_GEN_BUFFER
         tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* ??? Needs adjustments.  */
-        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
-           static buffer, we could size this on RESERVED_VA, on the text
-           segment size of the executable, or continue to use the default.  */
-        tb_size = (unsigned long)(ram_size / 4);
-#endif
     }
     if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
         tb_size = MIN_CODE_GEN_BUFFER_SIZE;
diff --git a/qemu-options.hx b/qemu-options.hx
index ac315c1ac45..0a4bbdb8eb9 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -139,7 +139,8 @@  irqchip completely is not recommended except for debugging purposes.
 @item kvm-shadow-mem=size
 Defines the size of the KVM shadow MMU.
 @item tb-size=@var{n}
-Controls the size (in MiB) of the TCG translation block cache.
+Controls the size (in MiB) of the TCG translation block cache. It has no effect on
+32 bit linux-user binaries.
 @item thread=single|multi
 Controls number of TCG threads. When the TCG is multi-threaded there will be one
 thread per vCPU therefor taking advantage of additional host cores. The default