diff mbox series

[v3,04/18] accel/tcg: Add probe_access_flags

Message ID 20200422043309.18430-5-richard.henderson@linaro.org
State Superseded
Headers show
Series target/arm: sve load/store improvements | expand

Commit Message

Richard Henderson April 22, 2020, 4:32 a.m. UTC
This new interface will allow targets to probe for a page
and then handle watchpoints themselves.  This will be most
useful for vector predicated memory operations, where one
page lookup can be used for many operations, and one test
can avoid many watchpoint checks.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
v2: Fix return of host pointer in softmmu probe_access_flags.
---
 include/exec/cpu-all.h  |  13 ++-
 include/exec/exec-all.h |  22 +++++
 accel/tcg/cputlb.c      | 177 ++++++++++++++++++++--------------------
 accel/tcg/user-exec.c   |  36 +++++---
 4 files changed, 149 insertions(+), 99 deletions(-)

-- 
2.20.1

Comments

Peter Maydell April 27, 2020, 10:48 a.m. UTC | #1
On Wed, 22 Apr 2020 at 05:33, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> This new interface will allow targets to probe for a page

> and then handle watchpoints themselves.  This will be most

> useful for vector predicated memory operations, where one

> page lookup can be used for many operations, and one test

> can avoid many watchpoint checks.

>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

> v2: Fix return of host pointer in softmmu probe_access_flags.

> ---

>  include/exec/cpu-all.h  |  13 ++-

>  include/exec/exec-all.h |  22 +++++

>  accel/tcg/cputlb.c      | 177 ++++++++++++++++++++--------------------

>  accel/tcg/user-exec.c   |  36 +++++---

>  4 files changed, 149 insertions(+), 99 deletions(-)

>

> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h

> index 49384bb66a..43ddcf024c 100644

> --- a/include/exec/cpu-all.h

> +++ b/include/exec/cpu-all.h

> @@ -328,7 +328,18 @@ CPUArchState *cpu_copy(CPUArchState *env);

>       | CPU_INTERRUPT_TGT_EXT_3   \

>       | CPU_INTERRUPT_TGT_EXT_4)

>

> -#if !defined(CONFIG_USER_ONLY)

> +#ifdef CONFIG_USER_ONLY

> +

> +/*

> + * Allow some level of source compatibility with softmmu.  We do not

> + * support any of the more exotic features, so only invalid pages may

> + * be signaled by probe_access_flags().

> + */

> +#define TLB_INVALID_MASK    (1 << (TARGET_PAGE_BITS_MIN - 1))

> +#define TLB_MMIO            0

> +#define TLB_WATCHPOINT      0

> +

> +#else

>

>  /*

>   * Flags stored in the low bits of the TLB virtual address.

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index d656a1f05c..8792bea07a 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -362,6 +362,28 @@ static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,

>      return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);

>  }

>

> +/**

> + * probe_access_flags:

> + * @env: CPUArchState

> + * @addr: guest virtual address to look up

> + * @access_type: read, write or execute permission

> + * @mmu_idx: MMU index to use for lookup

> + * @nonfault: suppress the fault

> + * @phost: return value for host address

> + * @retaddr: return address for unwinding

> + *

> + * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for

> + * the page, and storing the host address for RAM in @phost.

> + *

> + * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK.

> + * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags.

> + * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.


"TLB_NOTDIRTY"

> + * For simplicity, all "mmio-like" flags are folded to TLB_MMIO.

> + */

> +int probe_access_flags(CPUArchState *env, target_ulong addr,

> +                       MMUAccessType access_type, int mmu_idx,

> +                       bool nonfault, void **phost, uintptr_t retaddr);

> +

>  #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */

>

>  /* Estimated block size for TB allocation.  */

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index e3b5750c3b..bbe265ce28 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -1231,86 +1231,16 @@ static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,

>      }

>  }

>

> -/*

> - * Probe for whether the specified guest access is permitted. If it is not

> - * permitted then an exception will be taken in the same way as if this

> - * were a real access (and we will not return).

> - * If the size is 0 or the page requires I/O access, returns NULL; otherwise,

> - * returns the address of the host page similar to tlb_vaddr_to_host().

> - */

> -void *probe_access(CPUArchState *env, target_ulong addr, int size,

> -                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)

> +static int probe_access_internal(CPUArchState *env, target_ulong addr,

> +                                 int fault_size, MMUAccessType access_type,

> +                                 int mmu_idx, bool nonfault,

> +                                 void **phost, uintptr_t retaddr)

>  {

>      uintptr_t index = tlb_index(env, mmu_idx, addr);

>      CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);

> -    target_ulong tlb_addr;

> -    size_t elt_ofs;

> -    int wp_access;

> -

> -    g_assert(-(addr | TARGET_PAGE_MASK) >= size);

> -

> -    switch (access_type) {

> -    case MMU_DATA_LOAD:

> -        elt_ofs = offsetof(CPUTLBEntry, addr_read);

> -        wp_access = BP_MEM_READ;

> -        break;

> -    case MMU_DATA_STORE:

> -        elt_ofs = offsetof(CPUTLBEntry, addr_write);

> -        wp_access = BP_MEM_WRITE;

> -        break;

> -    case MMU_INST_FETCH:

> -        elt_ofs = offsetof(CPUTLBEntry, addr_code);

> -        wp_access = BP_MEM_READ;

> -        break;

> -    default:

> -        g_assert_not_reached();

> -    }

> -    tlb_addr = tlb_read_ofs(entry, elt_ofs);

> -

> -    if (unlikely(!tlb_hit(tlb_addr, addr))) {

> -        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,

> -                            addr & TARGET_PAGE_MASK)) {

> -            tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);

> -            /* TLB resize via tlb_fill may have moved the entry. */

> -            index = tlb_index(env, mmu_idx, addr);

> -            entry = tlb_entry(env, mmu_idx, addr);

> -        }

> -        tlb_addr = tlb_read_ofs(entry, elt_ofs);

> -    }

> -

> -    if (!size) {

> -        return NULL;

> -    }

> -

> -    if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {

> -        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];

> -

> -        /* Reject I/O access, or other required slow-path.  */

> -        if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {

> -            return NULL;

> -        }

> -

> -        /* Handle watchpoints.  */

> -        if (tlb_addr & TLB_WATCHPOINT) {

> -            cpu_check_watchpoint(env_cpu(env), addr, size,

> -                                 iotlbentry->attrs, wp_access, retaddr);

> -        }

> -

> -        /* Handle clean RAM pages.  */

> -        if (tlb_addr & TLB_NOTDIRTY) {

> -            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);

> -        }

> -    }

> -

> -    return (void *)((uintptr_t)addr + entry->addend);

> -}

> -

> -void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,

> -                        MMUAccessType access_type, int mmu_idx)

> -{

> -    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);

> -    target_ulong tlb_addr, page;

> +    target_ulong tlb_addr, page_addr;

>      size_t elt_ofs;

> +    int flags;

>

>      switch (access_type) {

>      case MMU_DATA_LOAD:

> @@ -1325,20 +1255,19 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,

>      default:

>          g_assert_not_reached();

>      }

> -

> -    page = addr & TARGET_PAGE_MASK;

>      tlb_addr = tlb_read_ofs(entry, elt_ofs);

>

> -    if (!tlb_hit_page(tlb_addr, page)) {

> -        uintptr_t index = tlb_index(env, mmu_idx, addr);

> -

> -        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {

> +    page_addr = addr & TARGET_PAGE_MASK;

> +    if (!tlb_hit_page(tlb_addr, page_addr)) {

> +        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {

>              CPUState *cs = env_cpu(env);

>              CPUClass *cc = CPU_GET_CLASS(cs);

>

> -            if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {

> +            if (!cc->tlb_fill(cs, addr, fault_size, access_type,

> +                              mmu_idx, nonfault, retaddr)) {

>                  /* Non-faulting page table read failed.  */

> -                return NULL;

> +                *phost = NULL;

> +                return TLB_INVALID_MASK;

>              }

>

>              /* TLB resize via tlb_fill may have moved the entry.  */

> @@ -1346,15 +1275,89 @@ void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,

>          }

>          tlb_addr = tlb_read_ofs(entry, elt_ofs);

>      }

> +    flags = tlb_addr & TLB_FLAGS_MASK;

>

> -    if (tlb_addr & ~TARGET_PAGE_MASK) {

> -        /* IO access */

> +    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */

> +    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {

> +        *phost = NULL;

> +        return TLB_MMIO;

> +    }

> +

> +    /* Everything else is RAM. */

> +    *phost = (void *)((uintptr_t)addr + entry->addend);

> +    return flags;

> +}

> +

> +int probe_access_flags(CPUArchState *env, target_ulong addr,

> +                       MMUAccessType access_type, int mmu_idx,

> +                       bool nonfault, void **phost, uintptr_t retaddr)

> +{

> +    int flags;

> +

> +    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,

> +                                  nonfault, phost, retaddr);

> +

> +    /* Handle clean RAM pages.  */

> +    if (unlikely(flags & TLB_NOTDIRTY)) {

> +        uintptr_t index = tlb_index(env, mmu_idx, addr);

> +        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];

> +

> +        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);

> +        flags &= ~TLB_NOTDIRTY;

> +    }


probe_access() handles watchpoints. Why doesn't probe_access_flags()
have to do that?

> +

> +    return flags;

> +}

> +

> +void *probe_access(CPUArchState *env, target_ulong addr, int size,

> +                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)

> +{

> +    void *host;

> +    int flags;

> +

> +    g_assert(-(addr | TARGET_PAGE_MASK) >= size);

> +

> +    flags = probe_access_internal(env, addr, size, access_type, mmu_idx,

> +                                  false, &host, retaddr);

> +

> +    /* Per the interface, size == 0 merely faults the access. */

> +    if (size == 0) {

>          return NULL;

>      }

>

> -    return (void *)((uintptr_t)addr + entry->addend);

> +    if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {

> +        uintptr_t index = tlb_index(env, mmu_idx, addr);

> +        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];

> +

> +        /* Handle clean RAM pages.  */

> +        if (flags & TLB_NOTDIRTY) {

> +            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);

> +        }

> +

> +        /* Handle watchpoints.  */

> +        if (flags & TLB_WATCHPOINT) {

> +            int wp_access = (access_type == MMU_DATA_STORE

> +                             ? BP_MEM_WRITE : BP_MEM_READ);

> +            cpu_check_watchpoint(env_cpu(env), addr, size,

> +                                 iotlbentry->attrs, wp_access, retaddr);

> +        }


The old code checked for watchpoints first, and then handled notdirty-writes,
which seems like the more correct order. Why has the new
version switched them around?

(Incidentally notdirty_write() is rather misleadingly named,
since it doesn't actually do a write, it just marks a notdirty
page as dirty.)

> +    }

> +

> +    return host;


The probe_access_internal() doc comment doesn't say that it
guarantees to set host to NULL for the TLB_MMIO/TLB_INVALID_MASK
cases, but we implicitly rely on it here.


>  }

>

> +void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,

> +                        MMUAccessType access_type, int mmu_idx)

> +{

> +    void *host;

> +    int flags;

> +

> +    flags = probe_access_internal(env, addr, 0, access_type,

> +                                  mmu_idx, true, &host, 0);

> +

> +    /* No combination of flags are expected by the caller. */

> +    return flags ? NULL : host;

> +}

>

>  #ifdef CONFIG_PLUGIN

>  /*

> diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c

> index 4be78eb9b3..91c2dc46dd 100644

> --- a/accel/tcg/user-exec.c

> +++ b/accel/tcg/user-exec.c

> @@ -190,13 +190,12 @@ static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,

>      g_assert_not_reached();

>  }

>

> -void *probe_access(CPUArchState *env, target_ulong addr, int size,

> -                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)

> +int probe_access_flags(CPUArchState *env, target_ulong addr,

> +                       MMUAccessType access_type, int mmu_idx,

> +                       bool nonfault, void **phost, uintptr_t ra)

>  {

>      int flags;

>

> -    g_assert(-(addr | TARGET_PAGE_MASK) >= size);

> -

>      switch (access_type) {

>      case MMU_DATA_STORE:

>          flags = PAGE_WRITE;

> @@ -211,15 +210,30 @@ void *probe_access(CPUArchState *env, target_ulong addr, int size,

>          g_assert_not_reached();

>      }

>

> -    if (!guest_addr_valid(addr) || page_check_range(addr, size, flags) < 0) {

> -        CPUState *cpu = env_cpu(env);

> -        CPUClass *cc = CPU_GET_CLASS(cpu);

> -        cc->tlb_fill(cpu, addr, size, access_type, MMU_USER_IDX, false,

> -                     retaddr);

> -        g_assert_not_reached();

> +    if (!guest_addr_valid(addr) || page_check_range(addr, 1, flags) < 0) {

> +        if (nonfault) {

> +            *phost = NULL;

> +            return TLB_INVALID_MASK;

> +        } else {

> +            CPUState *cpu = env_cpu(env);

> +            CPUClass *cc = CPU_GET_CLASS(cpu);

> +            cc->tlb_fill(cpu, addr, 0, access_type, MMU_USER_IDX, false, ra);

> +            g_assert_not_reached();

> +        }

>      }

>

> -    return size ? g2h(addr) : NULL;

> +    *phost = g2h(addr);

> +    return 0;

> +}

> +

> +void *probe_access(CPUArchState *env, target_ulong addr, int size,

> +                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)

> +{

> +    void *host;

> +

> +    g_assert(-(addr | TARGET_PAGE_MASK) >= size);

> +    probe_access_flags(env, addr, access_type, mmu_idx, false, &host, retaddr);

> +    return host;


The old code returned NULL for a zero size; the new version does not.
The old code passed size into cc->tlb_fill; the new version does not.
The old code passed size into page_check_range(); the new version does not.

thanks
-- PMM
Richard Henderson April 27, 2020, 4 p.m. UTC | #2
On 4/27/20 3:48 AM, Peter Maydell wrote:
> probe_access() handles watchpoints. Why doesn't probe_access_flags()

> have to do that?


Because we are explicitly deferring that work to the caller.  That's a good
fraction of the point of the new interface.

>> +        /* Handle clean RAM pages.  */

>> +        if (flags & TLB_NOTDIRTY) {

>> +            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);

>> +        }

>> +

>> +        /* Handle watchpoints.  */

>> +        if (flags & TLB_WATCHPOINT) {

>> +            int wp_access = (access_type == MMU_DATA_STORE

>> +                             ? BP_MEM_WRITE : BP_MEM_READ);

>> +            cpu_check_watchpoint(env_cpu(env), addr, size,

>> +                                 iotlbentry->attrs, wp_access, retaddr);

>> +        }

> 

> The old code checked for watchpoints first, and then handled notdirty-writes,

> which seems like the more correct order. Why has the new

> version switched them around?


Not an intentional change, but I shouldn't think it would matter in the end.

> The probe_access_internal() doc comment doesn't say that it

> guarantees to set host to NULL for the TLB_MMIO/TLB_INVALID_MASK

> cases, but we implicitly rely on it here.


Eh?  probe_access_internal doesn't have a doc comment.  Call that a bug if you
like, but you seem to be talking about something else.

>> +void *probe_access(CPUArchState *env, target_ulong addr, int size,

>> +                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)

>> +{

>> +    void *host;

>> +

>> +    g_assert(-(addr | TARGET_PAGE_MASK) >= size);

>> +    probe_access_flags(env, addr, access_type, mmu_idx, false, &host, retaddr);

>> +    return host;

> 

> The old code returned NULL for a zero size; the new version does not.


Granted.

> The old code passed size into cc->tlb_fill; the new version does not.

> The old code passed size into page_check_range(); the new version does not.


This is the user-only version, and size is not used for tlb_fill.  It is only
trivially used in page_change_range; we have just verified that addr+size does
not cross a page boundary.


r~
Peter Maydell May 4, 2020, 9:39 a.m. UTC | #3
On Mon, 27 Apr 2020 at 17:00, Richard Henderson
<richard.henderson@linaro.org> wrote:
>

> On 4/27/20 3:48 AM, Peter Maydell wrote:

> > The old code passed size into cc->tlb_fill; the new version does not.

> > The old code passed size into page_check_range(); the new version does not.

>

> This is the user-only version, and size is not used for tlb_fill.  It is only

> trivially used in page_change_range; we have just verified that addr+size does

> not cross a page boundary.


Yes, but:
 * they're APIs which take sizes
 * we have the size in hand so it's not difficult to pass it
 * in future maybe those functions will change to make more use
   of the size value, since they're being passed it
 * this change ought to be a refactoring that does the same thing
   the old code does

What's the reason for deliberately *not* passing the size and instead
using a constant 1 ?

thanks
-- PMM
diff mbox series

Patch

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 49384bb66a..43ddcf024c 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -328,7 +328,18 @@  CPUArchState *cpu_copy(CPUArchState *env);
      | CPU_INTERRUPT_TGT_EXT_3   \
      | CPU_INTERRUPT_TGT_EXT_4)
 
-#if !defined(CONFIG_USER_ONLY)
+#ifdef CONFIG_USER_ONLY
+
+/*
+ * Allow some level of source compatibility with softmmu.  We do not
+ * support any of the more exotic features, so only invalid pages may
+ * be signaled by probe_access_flags().
+ */
+#define TLB_INVALID_MASK    (1 << (TARGET_PAGE_BITS_MIN - 1))
+#define TLB_MMIO            0
+#define TLB_WATCHPOINT      0
+
+#else
 
 /*
  * Flags stored in the low bits of the TLB virtual address.
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d656a1f05c..8792bea07a 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -362,6 +362,28 @@  static inline void *probe_read(CPUArchState *env, target_ulong addr, int size,
     return probe_access(env, addr, size, MMU_DATA_LOAD, mmu_idx, retaddr);
 }
 
+/**
+ * probe_access_flags:
+ * @env: CPUArchState
+ * @addr: guest virtual address to look up
+ * @access_type: read, write or execute permission
+ * @mmu_idx: MMU index to use for lookup
+ * @nonfault: suppress the fault
+ * @phost: return value for host address
+ * @retaddr: return address for unwinding
+ *
+ * Similar to probe_access, loosely returning the TLB_FLAGS_MASK for
+ * the page, and storing the host address for RAM in @phost.
+ *
+ * If @nonfault is set, do not raise an exception but return TLB_INVALID_MASK.
+ * Do not handle watchpoints, but include TLB_WATCHPOINT in the returned flags.
+ * Do handle clean pages, so exclude TLB_NOTDIRY from the returned flags.
+ * For simplicity, all "mmio-like" flags are folded to TLB_MMIO.
+ */
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr);
+
 #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
 
 /* Estimated block size for TB allocation.  */
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index e3b5750c3b..bbe265ce28 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1231,86 +1231,16 @@  static void notdirty_write(CPUState *cpu, vaddr mem_vaddr, unsigned size,
     }
 }
 
-/*
- * Probe for whether the specified guest access is permitted. If it is not
- * permitted then an exception will be taken in the same way as if this
- * were a real access (and we will not return).
- * If the size is 0 or the page requires I/O access, returns NULL; otherwise,
- * returns the address of the host page similar to tlb_vaddr_to_host().
- */
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
-                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
+static int probe_access_internal(CPUArchState *env, target_ulong addr,
+                                 int fault_size, MMUAccessType access_type,
+                                 int mmu_idx, bool nonfault,
+                                 void **phost, uintptr_t retaddr)
 {
     uintptr_t index = tlb_index(env, mmu_idx, addr);
     CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    target_ulong tlb_addr;
-    size_t elt_ofs;
-    int wp_access;
-
-    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
-
-    switch (access_type) {
-    case MMU_DATA_LOAD:
-        elt_ofs = offsetof(CPUTLBEntry, addr_read);
-        wp_access = BP_MEM_READ;
-        break;
-    case MMU_DATA_STORE:
-        elt_ofs = offsetof(CPUTLBEntry, addr_write);
-        wp_access = BP_MEM_WRITE;
-        break;
-    case MMU_INST_FETCH:
-        elt_ofs = offsetof(CPUTLBEntry, addr_code);
-        wp_access = BP_MEM_READ;
-        break;
-    default:
-        g_assert_not_reached();
-    }
-    tlb_addr = tlb_read_ofs(entry, elt_ofs);
-
-    if (unlikely(!tlb_hit(tlb_addr, addr))) {
-        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs,
-                            addr & TARGET_PAGE_MASK)) {
-            tlb_fill(env_cpu(env), addr, size, access_type, mmu_idx, retaddr);
-            /* TLB resize via tlb_fill may have moved the entry. */
-            index = tlb_index(env, mmu_idx, addr);
-            entry = tlb_entry(env, mmu_idx, addr);
-        }
-        tlb_addr = tlb_read_ofs(entry, elt_ofs);
-    }
-
-    if (!size) {
-        return NULL;
-    }
-
-    if (unlikely(tlb_addr & TLB_FLAGS_MASK)) {
-        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
-
-        /* Reject I/O access, or other required slow-path.  */
-        if (tlb_addr & (TLB_MMIO | TLB_BSWAP | TLB_DISCARD_WRITE)) {
-            return NULL;
-        }
-
-        /* Handle watchpoints.  */
-        if (tlb_addr & TLB_WATCHPOINT) {
-            cpu_check_watchpoint(env_cpu(env), addr, size,
-                                 iotlbentry->attrs, wp_access, retaddr);
-        }
-
-        /* Handle clean RAM pages.  */
-        if (tlb_addr & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, size, iotlbentry, retaddr);
-        }
-    }
-
-    return (void *)((uintptr_t)addr + entry->addend);
-}
-
-void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
-                        MMUAccessType access_type, int mmu_idx)
-{
-    CPUTLBEntry *entry = tlb_entry(env, mmu_idx, addr);
-    target_ulong tlb_addr, page;
+    target_ulong tlb_addr, page_addr;
     size_t elt_ofs;
+    int flags;
 
     switch (access_type) {
     case MMU_DATA_LOAD:
@@ -1325,20 +1255,19 @@  void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
     default:
         g_assert_not_reached();
     }
-
-    page = addr & TARGET_PAGE_MASK;
     tlb_addr = tlb_read_ofs(entry, elt_ofs);
 
-    if (!tlb_hit_page(tlb_addr, page)) {
-        uintptr_t index = tlb_index(env, mmu_idx, addr);
-
-        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page)) {
+    page_addr = addr & TARGET_PAGE_MASK;
+    if (!tlb_hit_page(tlb_addr, page_addr)) {
+        if (!victim_tlb_hit(env, mmu_idx, index, elt_ofs, page_addr)) {
             CPUState *cs = env_cpu(env);
             CPUClass *cc = CPU_GET_CLASS(cs);
 
-            if (!cc->tlb_fill(cs, addr, 0, access_type, mmu_idx, true, 0)) {
+            if (!cc->tlb_fill(cs, addr, fault_size, access_type,
+                              mmu_idx, nonfault, retaddr)) {
                 /* Non-faulting page table read failed.  */
-                return NULL;
+                *phost = NULL;
+                return TLB_INVALID_MASK;
             }
 
             /* TLB resize via tlb_fill may have moved the entry.  */
@@ -1346,15 +1275,89 @@  void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
         }
         tlb_addr = tlb_read_ofs(entry, elt_ofs);
     }
+    flags = tlb_addr & TLB_FLAGS_MASK;
 
-    if (tlb_addr & ~TARGET_PAGE_MASK) {
-        /* IO access */
+    /* Fold all "mmio-like" bits into TLB_MMIO.  This is not RAM.  */
+    if (unlikely(flags & ~(TLB_WATCHPOINT | TLB_NOTDIRTY))) {
+        *phost = NULL;
+        return TLB_MMIO;
+    }
+
+    /* Everything else is RAM. */
+    *phost = (void *)((uintptr_t)addr + entry->addend);
+    return flags;
+}
+
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t retaddr)
+{
+    int flags;
+
+    flags = probe_access_internal(env, addr, 0, access_type, mmu_idx,
+                                  nonfault, phost, retaddr);
+
+    /* Handle clean RAM pages.  */
+    if (unlikely(flags & TLB_NOTDIRTY)) {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+
+        notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+        flags &= ~TLB_NOTDIRTY;
+    }
+
+    return flags;
+}
+
+void *probe_access(CPUArchState *env, target_ulong addr, int size,
+                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
+{
+    void *host;
+    int flags;
+
+    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
+
+    flags = probe_access_internal(env, addr, size, access_type, mmu_idx,
+                                  false, &host, retaddr);
+
+    /* Per the interface, size == 0 merely faults the access. */
+    if (size == 0) {
         return NULL;
     }
 
-    return (void *)((uintptr_t)addr + entry->addend);
+    if (unlikely(flags & (TLB_NOTDIRTY | TLB_WATCHPOINT))) {
+        uintptr_t index = tlb_index(env, mmu_idx, addr);
+        CPUIOTLBEntry *iotlbentry = &env_tlb(env)->d[mmu_idx].iotlb[index];
+
+        /* Handle clean RAM pages.  */
+        if (flags & TLB_NOTDIRTY) {
+            notdirty_write(env_cpu(env), addr, 1, iotlbentry, retaddr);
+        }
+
+        /* Handle watchpoints.  */
+        if (flags & TLB_WATCHPOINT) {
+            int wp_access = (access_type == MMU_DATA_STORE
+                             ? BP_MEM_WRITE : BP_MEM_READ);
+            cpu_check_watchpoint(env_cpu(env), addr, size,
+                                 iotlbentry->attrs, wp_access, retaddr);
+        }
+    }
+
+    return host;
 }
 
+void *tlb_vaddr_to_host(CPUArchState *env, abi_ptr addr,
+                        MMUAccessType access_type, int mmu_idx)
+{
+    void *host;
+    int flags;
+
+    flags = probe_access_internal(env, addr, 0, access_type,
+                                  mmu_idx, true, &host, 0);
+
+    /* No combination of flags are expected by the caller. */
+    return flags ? NULL : host;
+}
 
 #ifdef CONFIG_PLUGIN
 /*
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index 4be78eb9b3..91c2dc46dd 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -190,13 +190,12 @@  static inline int handle_cpu_signal(uintptr_t pc, siginfo_t *info,
     g_assert_not_reached();
 }
 
-void *probe_access(CPUArchState *env, target_ulong addr, int size,
-                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
+int probe_access_flags(CPUArchState *env, target_ulong addr,
+                       MMUAccessType access_type, int mmu_idx,
+                       bool nonfault, void **phost, uintptr_t ra)
 {
     int flags;
 
-    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
-
     switch (access_type) {
     case MMU_DATA_STORE:
         flags = PAGE_WRITE;
@@ -211,15 +210,30 @@  void *probe_access(CPUArchState *env, target_ulong addr, int size,
         g_assert_not_reached();
     }
 
-    if (!guest_addr_valid(addr) || page_check_range(addr, size, flags) < 0) {
-        CPUState *cpu = env_cpu(env);
-        CPUClass *cc = CPU_GET_CLASS(cpu);
-        cc->tlb_fill(cpu, addr, size, access_type, MMU_USER_IDX, false,
-                     retaddr);
-        g_assert_not_reached();
+    if (!guest_addr_valid(addr) || page_check_range(addr, 1, flags) < 0) {
+        if (nonfault) {
+            *phost = NULL;
+            return TLB_INVALID_MASK;
+        } else {
+            CPUState *cpu = env_cpu(env);
+            CPUClass *cc = CPU_GET_CLASS(cpu);
+            cc->tlb_fill(cpu, addr, 0, access_type, MMU_USER_IDX, false, ra);
+            g_assert_not_reached();
+        }
     }
 
-    return size ? g2h(addr) : NULL;
+    *phost = g2h(addr);
+    return 0;
+}
+
+void *probe_access(CPUArchState *env, target_ulong addr, int size,
+                   MMUAccessType access_type, int mmu_idx, uintptr_t retaddr)
+{
+    void *host;
+
+    g_assert(-(addr | TARGET_PAGE_MASK) >= size);
+    probe_access_flags(env, addr, access_type, mmu_idx, false, &host, retaddr);
+    return host;
 }
 
 #if defined(__i386__)