diff mbox series

[v3,10/10] accel/tcg: include cs_base in our hash calculations

Message ID 20230505155336.137393-11-alex.bennee@linaro.org
State New
Headers show
Series tracing: remove dynamic vcpu state | expand

Commit Message

Alex Bennée May 5, 2023, 3:53 p.m. UTC
We weren't using cs_base in the hash calculations before. Since the
arm front end moved a chunk of flags in a378206a20 (target/arm: Move
mode specific TB flags to tb->cs_base) they comprise of an important
part of the execution state.

Widen the tb_hash_func to include cs_base and expand to qemu_xxhash8()
to accommodate it.

My initial benchmark run on armhf shows very little difference in the
runtime.

Before:

  Time (mean ± σ):     24.440 s ±  2.885 s    [User: 34.474 s, System: 2.028 s]
  Range (min … max):   21.663 s … 29.937 s    20 runs

After:

  Time (mean ± σ):     24.348 s ±  2.717 s    [User: 34.668 s, System: 1.859 s]
  Range (min … max):   21.830 s … 30.093 s    20 runs

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
---
 accel/tcg/tb-hash.h   |  5 +++--
 include/qemu/xxhash.h | 21 +++++++++++++++------
 accel/tcg/cpu-exec.c  |  2 +-
 accel/tcg/tb-maint.c  |  4 ++--
 4 files changed, 21 insertions(+), 11 deletions(-)

Comments

Richard Henderson May 5, 2023, 6:27 p.m. UTC | #1
On 5/5/23 16:53, Alex Bennée wrote:
> +                      uint32_t flags, uint64_t flags2, uint32_t cf_mask)
>   {
> -    return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
> +    return qemu_xxhash8(phys_pc, pc, flags,
> +                        flags2 & 0xffff, flags2 >> 32, cf_mask);

Well not that mask, obviously.  Either pass it whole, like pc, or just let the compiler 
truncate for you without the mask.


r~
diff mbox series

Patch

diff --git a/accel/tcg/tb-hash.h b/accel/tcg/tb-hash.h
index 1d19c69caa..8cbafb5494 100644
--- a/accel/tcg/tb-hash.h
+++ b/accel/tcg/tb-hash.h
@@ -62,9 +62,10 @@  static inline unsigned int tb_jmp_cache_hash_func(target_ulong pc)
 
 static inline
 uint32_t tb_hash_func(tb_page_addr_t phys_pc, target_ulong pc,
-                      uint32_t flags, uint32_t cf_mask)
+                      uint32_t flags, uint64_t flags2, uint32_t cf_mask)
 {
-    return qemu_xxhash6(phys_pc, pc, flags, cf_mask);
+    return qemu_xxhash8(phys_pc, pc, flags,
+                        flags2 & 0xffff, flags2 >> 32, cf_mask);
 }
 
 #endif
diff --git a/include/qemu/xxhash.h b/include/qemu/xxhash.h
index c2dcccadbf..cd7054f07e 100644
--- a/include/qemu/xxhash.h
+++ b/include/qemu/xxhash.h
@@ -48,8 +48,8 @@ 
  * xxhash32, customized for input variables that are not guaranteed to be
  * contiguous in memory.
  */
-static inline uint32_t
-qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
+static inline uint32_t qemu_xxhash8(uint64_t ab, uint64_t cd, uint32_t e,
+                                    uint32_t f, uint32_t g, uint32_t h)
 {
     uint32_t v1 = QEMU_XXHASH_SEED + PRIME32_1 + PRIME32_2;
     uint32_t v2 = QEMU_XXHASH_SEED + PRIME32_2;
@@ -89,6 +89,9 @@  qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
     h32 += g * PRIME32_3;
     h32  = rol32(h32, 17) * PRIME32_4;
 
+    h32 += h * PRIME32_3;
+    h32  = rol32(h32, 17) * PRIME32_4;
+
     h32 ^= h32 >> 15;
     h32 *= PRIME32_2;
     h32 ^= h32 >> 13;
@@ -100,23 +103,29 @@  qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e, uint32_t f, uint32_t g)
 
 static inline uint32_t qemu_xxhash2(uint64_t ab)
 {
-    return qemu_xxhash7(ab, 0, 0, 0, 0);
+    return qemu_xxhash8(ab, 0, 0, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash4(uint64_t ab, uint64_t cd)
 {
-    return qemu_xxhash7(ab, cd, 0, 0, 0);
+    return qemu_xxhash8(ab, cd, 0, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash5(uint64_t ab, uint64_t cd, uint32_t e)
 {
-    return qemu_xxhash7(ab, cd, e, 0, 0);
+    return qemu_xxhash8(ab, cd, e, 0, 0, 0);
 }
 
 static inline uint32_t qemu_xxhash6(uint64_t ab, uint64_t cd, uint32_t e,
                                     uint32_t f)
 {
-    return qemu_xxhash7(ab, cd, e, f, 0);
+    return qemu_xxhash8(ab, cd, e, f, 0, 0);
+}
+
+static inline uint32_t qemu_xxhash7(uint64_t ab, uint64_t cd, uint32_t e,
+                                    uint32_t f, uint32_t g)
+{
+    return qemu_xxhash8(ab, cd, e, f, g, 0);
 }
 
 /*
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 973da2a434..8d67198e56 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -233,7 +233,7 @@  static TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
     }
     desc.page_addr0 = phys_pc;
     h = tb_hash_func(phys_pc, (cflags & CF_PCREL ? 0 : pc),
-                     flags, cflags);
+                     flags, cs_base, cflags);
     return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
 }
 
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 432a0cffdb..52d17815e4 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -887,7 +887,7 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     /* remove the TB from the hash list */
     phys_pc = tb_page_addr0(tb);
     h = tb_hash_func(phys_pc, (orig_cflags & CF_PCREL ? 0 : tb->pc),
-                     tb->flags, orig_cflags);
+                     tb->flags, tb->cs_base, orig_cflags);
     if (!qht_remove(&tb_ctx.htable, tb, h)) {
         return;
     }
@@ -968,7 +968,7 @@  TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
 
     /* add in the hash table */
     h = tb_hash_func(phys_pc, (tb->cflags & CF_PCREL ? 0 : tb->pc),
-                     tb->flags, tb->cflags);
+                     tb->flags, tb->cs_base, tb->cflags);
     qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
 
     /* remove TB from the page(s) if we couldn't insert it */