[v3,09/20] cputlb: Replace switches in load/store_helper with callback

Message ID 20190922035458.14879-10-richard.henderson@linaro.org
State New
Headers show
Series
  • Move rom and notdirty handling to cputlb
Related show

Commit Message

Richard Henderson Sept. 22, 2019, 3:54 a.m.
Add a function parameter to perform the actual load/store to ram.
With optimization, this results in identical code.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 accel/tcg/cputlb.c | 159 +++++++++++++++++++++++----------------------
 1 file changed, 83 insertions(+), 76 deletions(-)

-- 
2.17.1

Comments

David Hildenbrand Sept. 23, 2019, 8:32 a.m. | #1
On 22.09.19 05:54, Richard Henderson wrote:
> Add a function parameter to perform the actual load/store to ram.

> With optimization, this results in identical code.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  accel/tcg/cputlb.c | 159 +++++++++++++++++++++++----------------------

>  1 file changed, 83 insertions(+), 76 deletions(-)

> 

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index 2222b87764..b4a63d3928 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -1280,11 +1280,38 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,

>  

>  typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,

>                                  TCGMemOpIdx oi, uintptr_t retaddr);

> +typedef uint64_t LoadHelper(const void *);

> +

> +/* Wrap the unaligned load helpers to that they have a common signature.  */

> +static inline uint64_t wrap_ldub(const void *haddr)

> +{

> +    return ldub_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_be(const void *haddr)

> +{

> +    return lduw_be_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_le(const void *haddr)

> +{

> +    return lduw_le_p(haddr);

> +}

> +

> +static inline uint64_t wrap_ldul_be(const void *haddr)

> +{

> +    return (uint32_t)ldl_be_p(haddr);

> +}

> +

> +static inline uint64_t wrap_ldul_le(const void *haddr)

> +{

> +    return (uint32_t)ldl_le_p(haddr);

> +}

>  

>  static inline uint64_t QEMU_ALWAYS_INLINE

>  load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>              uintptr_t retaddr, MemOp op, bool code_read,

> -            FullLoadHelper *full_load)

> +            FullLoadHelper *full_load, LoadHelper *direct)

>  {

>      uintptr_t mmu_idx = get_mmuidx(oi);

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

> @@ -1373,33 +1400,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>  

>   do_aligned_access:

>      haddr = (void *)((uintptr_t)addr + entry->addend);

> -    switch (op) {

> -    case MO_UB:

> -        res = ldub_p(haddr);

> -        break;

> -    case MO_BEUW:

> -        res = lduw_be_p(haddr);

> -        break;

> -    case MO_LEUW:

> -        res = lduw_le_p(haddr);

> -        break;

> -    case MO_BEUL:

> -        res = (uint32_t)ldl_be_p(haddr);

> -        break;

> -    case MO_LEUL:

> -        res = (uint32_t)ldl_le_p(haddr);

> -        break;

> -    case MO_BEQ:

> -        res = ldq_be_p(haddr);

> -        break;

> -    case MO_LEQ:

> -        res = ldq_le_p(haddr);

> -        break;

> -    default:

> -        g_assert_not_reached();

> -    }

> -

> -    return res;

> +    return direct(haddr);

>  }

>  

>  /*

> @@ -1415,7 +1416,8 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>  static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,

>                                TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);

> +    return load_helper(env, addr, oi, retaddr, MO_UB, false,

> +                       full_ldub_mmu, wrap_ldub);

>  }

>  

>  tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,

> @@ -1428,7 +1430,7 @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUW, false,

> -                       full_le_lduw_mmu);

> +                       full_le_lduw_mmu, wrap_lduw_le);

>  }

>  

>  tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,

> @@ -1441,7 +1443,7 @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUW, false,

> -                       full_be_lduw_mmu);

> +                       full_be_lduw_mmu, wrap_lduw_be);

>  }

>  

>  tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,

> @@ -1454,7 +1456,7 @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUL, false,

> -                       full_le_ldul_mmu);

> +                       full_le_ldul_mmu, wrap_ldul_le);

>  }

>  

>  tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,

> @@ -1467,7 +1469,7 @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUL, false,

> -                       full_be_ldul_mmu);

> +                       full_be_ldul_mmu, wrap_ldul_be);

>  }

>  

>  tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,

> @@ -1480,14 +1482,14 @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,

>                             TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEQ, false,

> -                       helper_le_ldq_mmu);

> +                       helper_le_ldq_mmu, ldq_le_p);

>  }

>  

>  uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,

>                             TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEQ, false,

> -                       helper_be_ldq_mmu);

> +                       helper_be_ldq_mmu, ldq_be_p);

>  }

>  

>  /*

> @@ -1530,9 +1532,38 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,

>   * Store Helpers

>   */

>  

> +typedef void StoreHelper(void *, uint64_t);

> +

> +/* Wrap the unaligned store helpers to that they have a common signature.  */

> +static inline void wrap_stb(void *haddr, uint64_t val)

> +{

> +    stb_p(haddr, val);

> +}

> +

> +static inline void wrap_stw_be(void *haddr, uint64_t val)

> +{

> +    stw_be_p(haddr, val);

> +}

> +

> +static inline void wrap_stw_le(void *haddr, uint64_t val)

> +{

> +    stw_le_p(haddr, val);

> +}

> +

> +static inline void wrap_stl_be(void *haddr, uint64_t val)

> +{

> +    stl_be_p(haddr, val);

> +}

> +

> +static inline void wrap_stl_le(void *haddr, uint64_t val)

> +{

> +    stl_le_p(haddr, val);

> +}

> +

>  static inline void QEMU_ALWAYS_INLINE

>  store_helper(CPUArchState *env, target_ulong addr, uint64_t val,

> -             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)

> +             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op,

> +             StoreHelper *direct)

>  {

>      uintptr_t mmu_idx = get_mmuidx(oi);

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

> @@ -1657,74 +1688,49 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,

>  

>   do_aligned_access:

>      haddr = (void *)((uintptr_t)addr + entry->addend);

> -    switch (op) {

> -    case MO_UB:

> -        stb_p(haddr, val);

> -        break;

> -    case MO_BEUW:

> -        stw_be_p(haddr, val);

> -        break;

> -    case MO_LEUW:

> -        stw_le_p(haddr, val);

> -        break;

> -    case MO_BEUL:

> -        stl_be_p(haddr, val);

> -        break;

> -    case MO_LEUL:

> -        stl_le_p(haddr, val);

> -        break;

> -    case MO_BEQ:

> -        stq_be_p(haddr, val);

> -        break;

> -    case MO_LEQ:

> -        stq_le_p(haddr, val);

> -        break;

> -    default:

> -        g_assert_not_reached();

> -        break;

> -    }

> +    direct(haddr, val);

>  }

>  

>  void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,

>                          TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_UB);

> +    store_helper(env, addr, val, oi, retaddr, MO_UB, wrap_stb);

>  }

>  

>  void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEUW);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEUW, wrap_stw_le);

>  }

>  

>  void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEUW);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEUW, wrap_stw_be);

>  }

>  

>  void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEUL);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEUL, wrap_stl_le);

>  }

>  

>  void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEUL);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEUL, wrap_stl_be);

>  }

>  

>  void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEQ);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEQ, stq_le_p);

>  }

>  

>  void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEQ);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEQ, stq_be_p);

>  }

>  

>  /* First set of helpers allows passing in of OI and RETADDR.  This makes

> @@ -1789,7 +1795,8 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>  static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,

>                                 TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);

> +    return load_helper(env, addr, oi, retaddr, MO_8, true,

> +                       full_ldub_cmmu, wrap_ldub);

>  }

>  

>  uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1802,7 +1809,7 @@ static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUW, true,

> -                       full_le_lduw_cmmu);

> +                       full_le_lduw_cmmu, wrap_lduw_le);

>  }

>  

>  uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1815,7 +1822,7 @@ static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUW, true,

> -                       full_be_lduw_cmmu);

> +                       full_be_lduw_cmmu, wrap_lduw_be);

>  }

>  

>  uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1828,7 +1835,7 @@ static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUL, true,

> -                       full_le_ldul_cmmu);

> +                       full_le_ldul_cmmu, wrap_ldul_le);

>  }

>  

>  uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1841,7 +1848,7 @@ static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUL, true,

> -                       full_be_ldul_cmmu);

> +                       full_be_ldul_cmmu, wrap_ldul_be);

>  }

>  

>  uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1854,12 +1861,12 @@ uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,

>                              TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEQ, true,

> -                       helper_le_ldq_cmmu);

> +                       helper_le_ldq_cmmu, ldq_le_p);

>  }

>  

>  uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,

>                              TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEQ, true,

> -                       helper_be_ldq_cmmu);

> +                       helper_be_ldq_cmmu, ldq_be_p);

>  }

> 


Reviewed-by: David Hildenbrand <david@redhat.com>


-- 

Thanks,

David / dhildenb
Philippe Mathieu-Daudé Sept. 23, 2019, 9:27 a.m. | #2
On 9/22/19 5:54 AM, Richard Henderson wrote:
> Add a function parameter to perform the actual load/store to ram.

> With optimization, this results in identical code.

> 

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>  accel/tcg/cputlb.c | 159 +++++++++++++++++++++++----------------------

>  1 file changed, 83 insertions(+), 76 deletions(-)

> 

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index 2222b87764..b4a63d3928 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -1280,11 +1280,38 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,

>  

>  typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,

>                                  TCGMemOpIdx oi, uintptr_t retaddr);

> +typedef uint64_t LoadHelper(const void *);

> +

> +/* Wrap the unaligned load helpers to that they have a common signature.  */

> +static inline uint64_t wrap_ldub(const void *haddr)

> +{

> +    return ldub_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_be(const void *haddr)

> +{

> +    return lduw_be_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_le(const void *haddr)

> +{

> +    return lduw_le_p(haddr);

> +}

> +

> +static inline uint64_t wrap_ldul_be(const void *haddr)

> +{

> +    return (uint32_t)ldl_be_p(haddr);


Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>


> +}

> +

> +static inline uint64_t wrap_ldul_le(const void *haddr)

> +{

> +    return (uint32_t)ldl_le_p(haddr);

> +}

>  

>  static inline uint64_t QEMU_ALWAYS_INLINE

>  load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>              uintptr_t retaddr, MemOp op, bool code_read,

> -            FullLoadHelper *full_load)

> +            FullLoadHelper *full_load, LoadHelper *direct)

>  {

>      uintptr_t mmu_idx = get_mmuidx(oi);

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

> @@ -1373,33 +1400,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>  

>   do_aligned_access:

>      haddr = (void *)((uintptr_t)addr + entry->addend);

> -    switch (op) {

> -    case MO_UB:

> -        res = ldub_p(haddr);

> -        break;

> -    case MO_BEUW:

> -        res = lduw_be_p(haddr);

> -        break;

> -    case MO_LEUW:

> -        res = lduw_le_p(haddr);

> -        break;

> -    case MO_BEUL:

> -        res = (uint32_t)ldl_be_p(haddr);

> -        break;

> -    case MO_LEUL:

> -        res = (uint32_t)ldl_le_p(haddr);

> -        break;

> -    case MO_BEQ:

> -        res = ldq_be_p(haddr);

> -        break;

> -    case MO_LEQ:

> -        res = ldq_le_p(haddr);

> -        break;

> -    default:

> -        g_assert_not_reached();

> -    }

> -

> -    return res;

> +    return direct(haddr);

>  }

>  

>  /*

> @@ -1415,7 +1416,8 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>  static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,

>                                TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);

> +    return load_helper(env, addr, oi, retaddr, MO_UB, false,

> +                       full_ldub_mmu, wrap_ldub);

>  }

>  

>  tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,

> @@ -1428,7 +1430,7 @@ static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUW, false,

> -                       full_le_lduw_mmu);

> +                       full_le_lduw_mmu, wrap_lduw_le);

>  }

>  

>  tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,

> @@ -1441,7 +1443,7 @@ static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUW, false,

> -                       full_be_lduw_mmu);

> +                       full_be_lduw_mmu, wrap_lduw_be);

>  }

>  

>  tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,

> @@ -1454,7 +1456,7 @@ static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUL, false,

> -                       full_le_ldul_mmu);

> +                       full_le_ldul_mmu, wrap_ldul_le);

>  }

>  

>  tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,

> @@ -1467,7 +1469,7 @@ static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,

>                                   TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUL, false,

> -                       full_be_ldul_mmu);

> +                       full_be_ldul_mmu, wrap_ldul_be);

>  }

>  

>  tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,

> @@ -1480,14 +1482,14 @@ uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,

>                             TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEQ, false,

> -                       helper_le_ldq_mmu);

> +                       helper_le_ldq_mmu, ldq_le_p);

>  }

>  

>  uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,

>                             TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEQ, false,

> -                       helper_be_ldq_mmu);

> +                       helper_be_ldq_mmu, ldq_be_p);

>  }

>  

>  /*

> @@ -1530,9 +1532,38 @@ tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,

>   * Store Helpers

>   */

>  

> +typedef void StoreHelper(void *, uint64_t);

> +

> +/* Wrap the unaligned store helpers to that they have a common signature.  */

> +static inline void wrap_stb(void *haddr, uint64_t val)

> +{

> +    stb_p(haddr, val);

> +}

> +

> +static inline void wrap_stw_be(void *haddr, uint64_t val)

> +{

> +    stw_be_p(haddr, val);

> +}

> +

> +static inline void wrap_stw_le(void *haddr, uint64_t val)

> +{

> +    stw_le_p(haddr, val);

> +}

> +

> +static inline void wrap_stl_be(void *haddr, uint64_t val)

> +{

> +    stl_be_p(haddr, val);

> +}

> +

> +static inline void wrap_stl_le(void *haddr, uint64_t val)

> +{

> +    stl_le_p(haddr, val);

> +}

> +

>  static inline void QEMU_ALWAYS_INLINE

>  store_helper(CPUArchState *env, target_ulong addr, uint64_t val,

> -             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)

> +             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op,

> +             StoreHelper *direct)

>  {

>      uintptr_t mmu_idx = get_mmuidx(oi);

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

> @@ -1657,74 +1688,49 @@ store_helper(CPUArchState *env, target_ulong addr, uint64_t val,

>  

>   do_aligned_access:

>      haddr = (void *)((uintptr_t)addr + entry->addend);

> -    switch (op) {

> -    case MO_UB:

> -        stb_p(haddr, val);

> -        break;

> -    case MO_BEUW:

> -        stw_be_p(haddr, val);

> -        break;

> -    case MO_LEUW:

> -        stw_le_p(haddr, val);

> -        break;

> -    case MO_BEUL:

> -        stl_be_p(haddr, val);

> -        break;

> -    case MO_LEUL:

> -        stl_le_p(haddr, val);

> -        break;

> -    case MO_BEQ:

> -        stq_be_p(haddr, val);

> -        break;

> -    case MO_LEQ:

> -        stq_le_p(haddr, val);

> -        break;

> -    default:

> -        g_assert_not_reached();

> -        break;

> -    }

> +    direct(haddr, val);

>  }

>  

>  void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,

>                          TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_UB);

> +    store_helper(env, addr, val, oi, retaddr, MO_UB, wrap_stb);

>  }

>  

>  void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEUW);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEUW, wrap_stw_le);

>  }

>  

>  void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEUW);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEUW, wrap_stw_be);

>  }

>  

>  void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEUL);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEUL, wrap_stl_le);

>  }

>  

>  void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEUL);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEUL, wrap_stl_be);

>  }

>  

>  void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_LEQ);

> +    store_helper(env, addr, val, oi, retaddr, MO_LEQ, stq_le_p);

>  }

>  

>  void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>                         TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    store_helper(env, addr, val, oi, retaddr, MO_BEQ);

> +    store_helper(env, addr, val, oi, retaddr, MO_BEQ, stq_be_p);

>  }

>  

>  /* First set of helpers allows passing in of OI and RETADDR.  This makes

> @@ -1789,7 +1795,8 @@ void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,

>  static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,

>                                 TCGMemOpIdx oi, uintptr_t retaddr)

>  {

> -    return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);

> +    return load_helper(env, addr, oi, retaddr, MO_8, true,

> +                       full_ldub_cmmu, wrap_ldub);

>  }

>  

>  uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1802,7 +1809,7 @@ static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUW, true,

> -                       full_le_lduw_cmmu);

> +                       full_le_lduw_cmmu, wrap_lduw_le);

>  }

>  

>  uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1815,7 +1822,7 @@ static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUW, true,

> -                       full_be_lduw_cmmu);

> +                       full_be_lduw_cmmu, wrap_lduw_be);

>  }

>  

>  uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1828,7 +1835,7 @@ static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEUL, true,

> -                       full_le_ldul_cmmu);

> +                       full_le_ldul_cmmu, wrap_ldul_le);

>  }

>  

>  uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1841,7 +1848,7 @@ static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,

>                                    TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEUL, true,

> -                       full_be_ldul_cmmu);

> +                       full_be_ldul_cmmu, wrap_ldul_be);

>  }

>  

>  uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,

> @@ -1854,12 +1861,12 @@ uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,

>                              TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_LEQ, true,

> -                       helper_le_ldq_cmmu);

> +                       helper_le_ldq_cmmu, ldq_le_p);

>  }

>  

>  uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,

>                              TCGMemOpIdx oi, uintptr_t retaddr)

>  {

>      return load_helper(env, addr, oi, retaddr, MO_BEQ, true,

> -                       helper_be_ldq_cmmu);

> +                       helper_be_ldq_cmmu, ldq_be_p);

>  }

>
Paolo Bonzini Sept. 23, 2019, 9:51 a.m. | #3
On 22/09/19 05:54, Richard Henderson wrote:
> +/* Wrap the unaligned load helpers to that they have a common signature.  */

> +static inline uint64_t wrap_ldub(const void *haddr)

> +{

> +    return ldub_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_be(const void *haddr)

> +{

> +    return lduw_be_p(haddr);

> +}

> +

> +static inline uint64_t wrap_lduw_le(const void *haddr)

> +{

> +    return lduw_le_p(haddr);

> +}

> +

> +static inline uint64_t wrap_ldul_be(const void *haddr)

> +{

> +    return (uint32_t)ldl_be_p(haddr);

> +}

> +

> +static inline uint64_t wrap_ldul_le(const void *haddr)

> +{

> +    return (uint32_t)ldl_le_p(haddr);

> +}


Any reason to have these five functions (plus five for stores) instead
of a pair

static uint64_t ld_memop(const void *haddr, MemOp op)
{
}

static uint64_t st_memop(void *haddr, MemOp op, uint64_t val)
{
}

that includes the switches?  Everything should be inlined just the same
if you do

        if (unlikely(tlb_addr & TLB_BSWAP)) {
            st_memop(haddr, op ^ MO_BSWAP, val);
        } else {
            st_memop(haddr, op, val);
        }

and the like.

Paolo

>  static inline uint64_t QEMU_ALWAYS_INLINE

>  load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>              uintptr_t retaddr, MemOp op, bool code_read,

> -            FullLoadHelper *full_load)

> +            FullLoadHelper *full_load, LoadHelper *direct)

>  {

>      uintptr_t mmu_idx = get_mmuidx(oi);

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

> @@ -1373,33 +1400,7 @@ load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,

>  

>   do_aligned_access:

>      haddr = (void *)((uintptr_t)addr + entry->addend);

> -    switch (op) {

> -    case MO_UB:

> -        res = ldub_p(haddr);

> -        break;

> -    case MO_BEUW:

> -        res = lduw_be_p(haddr);

> -        break;

> -    case MO_LEUW:

> -        res = lduw_le_p(haddr);

> -        break;

> -    case MO_BEUL:

> -        res = (uint32_t)ldl_be_p(haddr);

> -        break;

> -    case MO_LEUL:

> -        res = (uint32_t)ldl_le_p(haddr);

> -        break;

> -    case MO_BEQ:

> -        res = ldq_be_p(haddr);

> -        break;

> -    case MO_LEQ:

> -        res = ldq_le_p(haddr);

> -        break;

> -    default:

> -        g_assert_not_reached();

> -    }

> -

> -    return res;

> +    return direct(haddr);
David Hildenbrand Sept. 23, 2019, 9:54 a.m. | #4
On 23.09.19 11:51, Paolo Bonzini wrote:
> On 22/09/19 05:54, Richard Henderson wrote:

>> +/* Wrap the unaligned load helpers to that they have a common signature.  */

>> +static inline uint64_t wrap_ldub(const void *haddr)

>> +{

>> +    return ldub_p(haddr);

>> +}

>> +

>> +static inline uint64_t wrap_lduw_be(const void *haddr)

>> +{

>> +    return lduw_be_p(haddr);

>> +}

>> +

>> +static inline uint64_t wrap_lduw_le(const void *haddr)

>> +{

>> +    return lduw_le_p(haddr);

>> +}

>> +

>> +static inline uint64_t wrap_ldul_be(const void *haddr)

>> +{

>> +    return (uint32_t)ldl_be_p(haddr);

>> +}

>> +

>> +static inline uint64_t wrap_ldul_le(const void *haddr)

>> +{

>> +    return (uint32_t)ldl_le_p(haddr);

>> +}

> 

> Any reason to have these five functions (plus five for stores) instead

> of a pair

> 

> static uint64_t ld_memop(const void *haddr, MemOp op)

> {

> }

> 

> static uint64_t st_memop(void *haddr, MemOp op, uint64_t val)

> {

> }

> 

> that includes the switches?  Everything should be inlined just the same

> if you do

> 

>         if (unlikely(tlb_addr & TLB_BSWAP)) {

>             st_memop(haddr, op ^ MO_BSWAP, val);

>         } else {

>             st_memop(haddr, op, val);

>         }


I asked the same question on v2 and Richard explained that - for
whatever reason -  the compiler will not properly propagate the constant
in the "op ^ MO_BSWAP" case.

> 

> and the like.

> 

> Paolo



-- 

Thanks,

David / dhildenb
Paolo Bonzini Sept. 23, 2019, 10:02 a.m. | #5
On 23/09/19 11:54, David Hildenbrand wrote:
> On 23.09.19 11:51, Paolo Bonzini wrote:

>> On 22/09/19 05:54, Richard Henderson wrote:

>>> +/* Wrap the unaligned load helpers to that they have a common signature.  */

>>> +static inline uint64_t wrap_ldub(const void *haddr)

>>> +{

>>> +    return ldub_p(haddr);

>>> +}

>>> +

>>> +static inline uint64_t wrap_lduw_be(const void *haddr)

>>> +{

>>> +    return lduw_be_p(haddr);

>>> +}

>>> +

>>> +static inline uint64_t wrap_lduw_le(const void *haddr)

>>> +{

>>> +    return lduw_le_p(haddr);

>>> +}

>>> +

>>> +static inline uint64_t wrap_ldul_be(const void *haddr)

>>> +{

>>> +    return (uint32_t)ldl_be_p(haddr);

>>> +}

>>> +

>>> +static inline uint64_t wrap_ldul_le(const void *haddr)

>>> +{

>>> +    return (uint32_t)ldl_le_p(haddr);

>>> +}

>>

>> Any reason to have these five functions (plus five for stores) instead

>> of a pair

>>

>> static uint64_t ld_memop(const void *haddr, MemOp op)

>> {

>> }

>>

>> static uint64_t st_memop(void *haddr, MemOp op, uint64_t val)

>> {

>> }

>>

>> that includes the switches?  Everything should be inlined just the same

>> if you do

>>

>>         if (unlikely(tlb_addr & TLB_BSWAP)) {

>>             st_memop(haddr, op ^ MO_BSWAP, val);

>>         } else {

>>             st_memop(haddr, op, val);

>>         }

> 

> I asked the same question on v2 and Richard explained that - for

> whatever reason -  the compiler will not properly propagate the constant

> in the "op ^ MO_BSWAP" case.


Even if ld_memop and st_memop are __always_inline__?

Paolo
Richard Henderson Sept. 23, 2019, 3:52 p.m. | #6
On 9/23/19 3:02 AM, Paolo Bonzini wrote:
> On 23/09/19 11:54, David Hildenbrand wrote:

>> On 23.09.19 11:51, Paolo Bonzini wrote:

>>> that includes the switches?  Everything should be inlined just the same

>>> if you do

>>>

>>>         if (unlikely(tlb_addr & TLB_BSWAP)) {

>>>             st_memop(haddr, op ^ MO_BSWAP, val);

>>>         } else {

>>>             st_memop(haddr, op, val);

>>>         }

>>

>> I asked the same question on v2 and Richard explained that - for

>> whatever reason -  the compiler will not properly propagate the constant

>> in the "op ^ MO_BSWAP" case.

> 

> Even if ld_memop and st_memop are __always_inline__?


I'm not sure I tried __always_inline__.  I can, if you like.


r~
Richard Henderson Sept. 23, 2019, 6:18 p.m. | #7
On 9/23/19 8:52 AM, Richard Henderson wrote:
> On 9/23/19 3:02 AM, Paolo Bonzini wrote:

>> On 23/09/19 11:54, David Hildenbrand wrote:

>>> On 23.09.19 11:51, Paolo Bonzini wrote:

>>>> that includes the switches?  Everything should be inlined just the same

>>>> if you do

>>>>

>>>>         if (unlikely(tlb_addr & TLB_BSWAP)) {

>>>>             st_memop(haddr, op ^ MO_BSWAP, val);

>>>>         } else {

>>>>             st_memop(haddr, op, val);

>>>>         }

>>>

>>> I asked the same question on v2 and Richard explained that - for

>>> whatever reason -  the compiler will not properly propagate the constant

>>> in the "op ^ MO_BSWAP" case.

>>

>> Even if ld_memop and st_memop are __always_inline__?

> 

> I'm not sure I tried __always_inline__.  I can, if you like.


FWIW, that works.

Since two of you have now asked about this, I'll change the patch.


r~

Patch

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 2222b87764..b4a63d3928 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1280,11 +1280,38 @@  static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr,
 
 typedef uint64_t FullLoadHelper(CPUArchState *env, target_ulong addr,
                                 TCGMemOpIdx oi, uintptr_t retaddr);
+typedef uint64_t LoadHelper(const void *);
+
+/* Wrap the unaligned load helpers to that they have a common signature.  */
+static inline uint64_t wrap_ldub(const void *haddr)
+{
+    return ldub_p(haddr);
+}
+
+static inline uint64_t wrap_lduw_be(const void *haddr)
+{
+    return lduw_be_p(haddr);
+}
+
+static inline uint64_t wrap_lduw_le(const void *haddr)
+{
+    return lduw_le_p(haddr);
+}
+
+static inline uint64_t wrap_ldul_be(const void *haddr)
+{
+    return (uint32_t)ldl_be_p(haddr);
+}
+
+static inline uint64_t wrap_ldul_le(const void *haddr)
+{
+    return (uint32_t)ldl_le_p(haddr);
+}
 
 static inline uint64_t QEMU_ALWAYS_INLINE
 load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
             uintptr_t retaddr, MemOp op, bool code_read,
-            FullLoadHelper *full_load)
+            FullLoadHelper *full_load, LoadHelper *direct)
 {
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
@@ -1373,33 +1400,7 @@  load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
 
  do_aligned_access:
     haddr = (void *)((uintptr_t)addr + entry->addend);
-    switch (op) {
-    case MO_UB:
-        res = ldub_p(haddr);
-        break;
-    case MO_BEUW:
-        res = lduw_be_p(haddr);
-        break;
-    case MO_LEUW:
-        res = lduw_le_p(haddr);
-        break;
-    case MO_BEUL:
-        res = (uint32_t)ldl_be_p(haddr);
-        break;
-    case MO_LEUL:
-        res = (uint32_t)ldl_le_p(haddr);
-        break;
-    case MO_BEQ:
-        res = ldq_be_p(haddr);
-        break;
-    case MO_LEQ:
-        res = ldq_le_p(haddr);
-        break;
-    default:
-        g_assert_not_reached();
-    }
-
-    return res;
+    return direct(haddr);
 }
 
 /*
@@ -1415,7 +1416,8 @@  load_helper(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi,
 static uint64_t full_ldub_mmu(CPUArchState *env, target_ulong addr,
                               TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_UB, false, full_ldub_mmu);
+    return load_helper(env, addr, oi, retaddr, MO_UB, false,
+                       full_ldub_mmu, wrap_ldub);
 }
 
 tcg_target_ulong helper_ret_ldub_mmu(CPUArchState *env, target_ulong addr,
@@ -1428,7 +1430,7 @@  static uint64_t full_le_lduw_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEUW, false,
-                       full_le_lduw_mmu);
+                       full_le_lduw_mmu, wrap_lduw_le);
 }
 
 tcg_target_ulong helper_le_lduw_mmu(CPUArchState *env, target_ulong addr,
@@ -1441,7 +1443,7 @@  static uint64_t full_be_lduw_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEUW, false,
-                       full_be_lduw_mmu);
+                       full_be_lduw_mmu, wrap_lduw_be);
 }
 
 tcg_target_ulong helper_be_lduw_mmu(CPUArchState *env, target_ulong addr,
@@ -1454,7 +1456,7 @@  static uint64_t full_le_ldul_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEUL, false,
-                       full_le_ldul_mmu);
+                       full_le_ldul_mmu, wrap_ldul_le);
 }
 
 tcg_target_ulong helper_le_ldul_mmu(CPUArchState *env, target_ulong addr,
@@ -1467,7 +1469,7 @@  static uint64_t full_be_ldul_mmu(CPUArchState *env, target_ulong addr,
                                  TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEUL, false,
-                       full_be_ldul_mmu);
+                       full_be_ldul_mmu, wrap_ldul_be);
 }
 
 tcg_target_ulong helper_be_ldul_mmu(CPUArchState *env, target_ulong addr,
@@ -1480,14 +1482,14 @@  uint64_t helper_le_ldq_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEQ, false,
-                       helper_le_ldq_mmu);
+                       helper_le_ldq_mmu, ldq_le_p);
 }
 
 uint64_t helper_be_ldq_mmu(CPUArchState *env, target_ulong addr,
                            TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEQ, false,
-                       helper_be_ldq_mmu);
+                       helper_be_ldq_mmu, ldq_be_p);
 }
 
 /*
@@ -1530,9 +1532,38 @@  tcg_target_ulong helper_be_ldsl_mmu(CPUArchState *env, target_ulong addr,
  * Store Helpers
  */
 
+typedef void StoreHelper(void *, uint64_t);
+
+/* Wrap the unaligned store helpers to that they have a common signature.  */
+static inline void wrap_stb(void *haddr, uint64_t val)
+{
+    stb_p(haddr, val);
+}
+
+static inline void wrap_stw_be(void *haddr, uint64_t val)
+{
+    stw_be_p(haddr, val);
+}
+
+static inline void wrap_stw_le(void *haddr, uint64_t val)
+{
+    stw_le_p(haddr, val);
+}
+
+static inline void wrap_stl_be(void *haddr, uint64_t val)
+{
+    stl_be_p(haddr, val);
+}
+
+static inline void wrap_stl_le(void *haddr, uint64_t val)
+{
+    stl_le_p(haddr, val);
+}
+
 static inline void QEMU_ALWAYS_INLINE
 store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
-             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op)
+             TCGMemOpIdx oi, uintptr_t retaddr, MemOp op,
+             StoreHelper *direct)
 {
     uintptr_t mmu_idx = get_mmuidx(oi);
     uintptr_t index = tlb_index(env, mmu_idx, addr);
@@ -1657,74 +1688,49 @@  store_helper(CPUArchState *env, target_ulong addr, uint64_t val,
 
  do_aligned_access:
     haddr = (void *)((uintptr_t)addr + entry->addend);
-    switch (op) {
-    case MO_UB:
-        stb_p(haddr, val);
-        break;
-    case MO_BEUW:
-        stw_be_p(haddr, val);
-        break;
-    case MO_LEUW:
-        stw_le_p(haddr, val);
-        break;
-    case MO_BEUL:
-        stl_be_p(haddr, val);
-        break;
-    case MO_LEUL:
-        stl_le_p(haddr, val);
-        break;
-    case MO_BEQ:
-        stq_be_p(haddr, val);
-        break;
-    case MO_LEQ:
-        stq_le_p(haddr, val);
-        break;
-    default:
-        g_assert_not_reached();
-        break;
-    }
+    direct(haddr, val);
 }
 
 void helper_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
                         TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_UB);
+    store_helper(env, addr, val, oi, retaddr, MO_UB, wrap_stb);
 }
 
 void helper_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_LEUW);
+    store_helper(env, addr, val, oi, retaddr, MO_LEUW, wrap_stw_le);
 }
 
 void helper_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_BEUW);
+    store_helper(env, addr, val, oi, retaddr, MO_BEUW, wrap_stw_be);
 }
 
 void helper_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_LEUL);
+    store_helper(env, addr, val, oi, retaddr, MO_LEUL, wrap_stl_le);
 }
 
 void helper_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_BEUL);
+    store_helper(env, addr, val, oi, retaddr, MO_BEUL, wrap_stl_be);
 }
 
 void helper_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_LEQ);
+    store_helper(env, addr, val, oi, retaddr, MO_LEQ, stq_le_p);
 }
 
 void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
                        TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    store_helper(env, addr, val, oi, retaddr, MO_BEQ);
+    store_helper(env, addr, val, oi, retaddr, MO_BEQ, stq_be_p);
 }
 
 /* First set of helpers allows passing in of OI and RETADDR.  This makes
@@ -1789,7 +1795,8 @@  void helper_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val,
 static uint64_t full_ldub_cmmu(CPUArchState *env, target_ulong addr,
                                TCGMemOpIdx oi, uintptr_t retaddr)
 {
-    return load_helper(env, addr, oi, retaddr, MO_8, true, full_ldub_cmmu);
+    return load_helper(env, addr, oi, retaddr, MO_8, true,
+                       full_ldub_cmmu, wrap_ldub);
 }
 
 uint8_t helper_ret_ldb_cmmu(CPUArchState *env, target_ulong addr,
@@ -1802,7 +1809,7 @@  static uint64_t full_le_lduw_cmmu(CPUArchState *env, target_ulong addr,
                                   TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEUW, true,
-                       full_le_lduw_cmmu);
+                       full_le_lduw_cmmu, wrap_lduw_le);
 }
 
 uint16_t helper_le_ldw_cmmu(CPUArchState *env, target_ulong addr,
@@ -1815,7 +1822,7 @@  static uint64_t full_be_lduw_cmmu(CPUArchState *env, target_ulong addr,
                                   TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEUW, true,
-                       full_be_lduw_cmmu);
+                       full_be_lduw_cmmu, wrap_lduw_be);
 }
 
 uint16_t helper_be_ldw_cmmu(CPUArchState *env, target_ulong addr,
@@ -1828,7 +1835,7 @@  static uint64_t full_le_ldul_cmmu(CPUArchState *env, target_ulong addr,
                                   TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEUL, true,
-                       full_le_ldul_cmmu);
+                       full_le_ldul_cmmu, wrap_ldul_le);
 }
 
 uint32_t helper_le_ldl_cmmu(CPUArchState *env, target_ulong addr,
@@ -1841,7 +1848,7 @@  static uint64_t full_be_ldul_cmmu(CPUArchState *env, target_ulong addr,
                                   TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEUL, true,
-                       full_be_ldul_cmmu);
+                       full_be_ldul_cmmu, wrap_ldul_be);
 }
 
 uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
@@ -1854,12 +1861,12 @@  uint64_t helper_le_ldq_cmmu(CPUArchState *env, target_ulong addr,
                             TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_LEQ, true,
-                       helper_le_ldq_cmmu);
+                       helper_le_ldq_cmmu, ldq_le_p);
 }
 
 uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
                             TCGMemOpIdx oi, uintptr_t retaddr)
 {
     return load_helper(env, addr, oi, retaddr, MO_BEQ, true,
-                       helper_be_ldq_cmmu);
+                       helper_be_ldq_cmmu, ldq_be_p);
 }