[17/27] exec.c: Handle IOMMUs in address_space_translate_for_iotlb()

Message ID 20180521140402.23318-18-peter.maydell@linaro.org
State Superseded
Headers show
Series
  • iommu: support txattrs, support TCG execution, implement TZ MPC
Related show

Commit Message

Peter Maydell May 21, 2018, 2:03 p.m.
Currently we don't support board configurations that put an IOMMU
in the path of the CPU's memory transactions, and instead just
assert() if the memory region fonud in address_space_translate_for_iotlb()
is an IOMMUMemoryRegion.

Remove this limitation by having the function handle IOMMUs.
This is mostly straightforward, but we must make sure we have
a notifier registered for every IOMMU that a transaction has
passed through, so that we can flush the TLB appropriately
when any of the IOMMUs change their mappings.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

---
 include/exec/exec-all.h |   3 +-
 include/qom/cpu.h       |   3 +
 accel/tcg/cputlb.c      |   3 +-
 exec.c                  | 147 +++++++++++++++++++++++++++++++++++++++-
 4 files changed, 152 insertions(+), 4 deletions(-)

-- 
2.17.0

Comments

Alex Bennée May 23, 2018, 9:51 a.m. | #1
Peter Maydell <peter.maydell@linaro.org> writes:

> Currently we don't support board configurations that put an IOMMU

> in the path of the CPU's memory transactions, and instead just

> assert() if the memory region fonud in address_space_translate_for_iotlb()

> is an IOMMUMemoryRegion.

>

> Remove this limitation by having the function handle IOMMUs.

> This is mostly straightforward, but we must make sure we have

> a notifier registered for every IOMMU that a transaction has

> passed through, so that we can flush the TLB appropriately

> when any of the IOMMUs change their mappings.

>

> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

> ---

>  include/exec/exec-all.h |   3 +-

>  include/qom/cpu.h       |   3 +

>  accel/tcg/cputlb.c      |   3 +-

>  exec.c                  | 147 +++++++++++++++++++++++++++++++++++++++-

>  4 files changed, 152 insertions(+), 4 deletions(-)

>

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index 4d09eaba72..e0ff19b711 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -469,7 +469,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);

>

>  MemoryRegionSection *

>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

> -                                  hwaddr *xlat, hwaddr *plen);

> +                                  hwaddr *xlat, hwaddr *plen,

> +                                  MemTxAttrs attrs, int *prot);

>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

>                                         MemoryRegionSection *section,

>                                         target_ulong vaddr,

> diff --git a/include/qom/cpu.h b/include/qom/cpu.h

> index 9d3afc6c75..d4a30149dd 100644

> --- a/include/qom/cpu.h

> +++ b/include/qom/cpu.h

> @@ -429,6 +429,9 @@ struct CPUState {

>      uint16_t pending_tlb_flush;

>

>      int hvf_fd;

> +

> +    /* track IOMMUs whose translations we've cached in the TCG TLB */

> +    GSList *iommu_notifiers;


So we are only concerned about TCG IOMMU notifiers here, specifically
TCGIOMMUNotifier structures. Why not just use a GArray and save
ourselves chasing pointers?

>  };

>

>  QTAILQ_HEAD(CPUTailQ, CPUState);

> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

> index 05439039e9..c8acaf21e9 100644

> --- a/accel/tcg/cputlb.c

> +++ b/accel/tcg/cputlb.c

> @@ -632,7 +632,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>      }

>

>      sz = size;

> -    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);

> +    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,

> +                                                attrs, &prot);

>      assert(sz >= TARGET_PAGE_SIZE);

>

>      tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx

> diff --git a/exec.c b/exec.c

> index c9285c9c39..6c8f2dcc3f 100644

> --- a/exec.c

> +++ b/exec.c

> @@ -650,18 +650,158 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,

>      return mr;

>  }

>

> +typedef struct TCGIOMMUNotifier {

> +    IOMMUNotifier n;

> +    MemoryRegion *mr;

> +    CPUState *cpu;


This seems superfluous if we are storing the list of notifiers in the CPUState

> +    int iommu_idx;

> +    bool active;

> +} TCGIOMMUNotifier;

> +

> +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)

> +{

> +    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);

> +

> +    if (!notifier->active) {

> +        return;

> +    }

> +    tlb_flush(notifier->cpu);

> +    notifier->active = false;

> +    /* We leave the notifier struct on the list to avoid reallocating it later.

> +     * Generally the number of IOMMUs a CPU deals with will be small.

> +     * In any case we can't unregister the iommu notifier from a notify

> +     * callback.

> +     */

> +}

> +

> +static gint tcg_iommu_find_notifier(gconstpointer a, gconstpointer b)

> +{

> +    TCGIOMMUNotifier *notifier = (TCGIOMMUNotifier *)a;

> +    TCGIOMMUNotifier *seeking = (TCGIOMMUNotifier *)b;

> +

> +    if (notifier->mr == seeking->mr &&

> +        notifier->iommu_idx == seeking->iommu_idx) {

> +        return 0;

> +    }

> +    return 1;

> +}

> +

> +static void tcg_register_iommu_notifier(CPUState *cpu,

> +                                        IOMMUMemoryRegion *iommu_mr,

> +                                        int iommu_idx)

> +{

> +    /* Make sure this CPU has an IOMMU notifier registered for this

> +     * IOMMU/IOMMU index combination, so that we can flush its TLB

> +     * when the IOMMU tells us the mappings we've cached have changed.

> +     */

> +    TCGIOMMUNotifier seeking = {

> +        .mr = MEMORY_REGION(iommu_mr),

> +        .iommu_idx = iommu_idx,

> +    };

> +    TCGIOMMUNotifier *notifier;

> +    GSList *found = g_slist_find_custom(cpu->iommu_notifiers,

> +                                        &seeking,

> +                                        tcg_iommu_find_notifier);

> +    if (found) {

> +        notifier = found->data;

> +    } else {

> +        notifier = g_new0(TCGIOMMUNotifier, 1);

> +        notifier->mr = seeking.mr;

> +        notifier->iommu_idx = iommu_idx;

> +        notifier->cpu = cpu;

> +        /* Rather than trying to register interest in the specific part

> +         * of the iommu's address space that we've accessed and then

> +         * expand it later as subsequent accesses touch more of it, we

> +         * just register interest in the whole thing, on the assumption

> +         * that iommu reconfiguration will be rare.

> +         */

> +        iommu_notifier_init(&notifier->n,

> +                            tcg_iommu_unmap_notify,

> +                            IOMMU_NOTIFIER_UNMAP,

> +                            0,

> +                            HWADDR_MAX,

> +                            iommu_idx);

> +        memory_region_register_iommu_notifier(notifier->mr, &notifier->n);

> +        cpu->iommu_notifiers = g_slist_prepend(cpu->iommu_notifiers,

> +                                               notifier);

> +    }

> +    if (!notifier->active) {

> +        notifier->active = true;

> +    }

> +}

> +

> +static void tcg_iommu_notifier_destroy(gpointer data)

> +{

> +    TCGIOMMUNotifier *notifier = data;

> +

> +    if (notifier->active) {

> +        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);

> +    }

> +    g_free(notifier);

> +}

> +

> +static void tcg_iommu_free_notifier_list(CPUState *cpu)

> +{

> +    /* Destroy the CPU's notifier list */

> +    g_slist_free_full(cpu->iommu_notifiers, tcg_iommu_notifier_destroy);

> +    cpu->iommu_notifiers = NULL;

> +}

> +

>  /* Called from RCU critical section */

>  MemoryRegionSection *

>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

> -                                  hwaddr *xlat, hwaddr *plen)

> +                                  hwaddr *xlat, hwaddr *plen,

> +                                  MemTxAttrs attrs, int *prot)

>  {

>      MemoryRegionSection *section;

> +    IOMMUMemoryRegion *iommu_mr;

> +    IOMMUMemoryRegionClass *imrc;

> +    IOMMUTLBEntry iotlb;

> +    int iommu_idx;

>      AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

>

> -    section = address_space_translate_internal(d, addr, xlat, plen, false);

> +    for (;;) {

> +        section = address_space_translate_internal(d, addr, &addr, plen, false);

> +

> +        iommu_mr = memory_region_get_iommu(section->mr);

> +        if (!iommu_mr) {

> +            break;

> +        }

> +

> +        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);

> +

> +        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);

> +        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);

> +        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU

> +         * doesn't short-cut its translation table walk.

> +         */

> +        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);

> +        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)

> +                | (addr & iotlb.addr_mask));

> +        /* Update the caller's prot bits to remove permissions the IOMMU

> +         * is giving us a failure response for. If we get down to no

> +         * permissions left at all we can give up now.

> +         */

> +        if (!(iotlb.perm & IOMMU_RO)) {

> +            *prot &= ~(PAGE_READ | PAGE_EXEC);

> +        }

> +        if (!(iotlb.perm & IOMMU_WO)) {

> +            *prot &= ~PAGE_WRITE;

> +        }

> +

> +        if (!*prot) {

> +            goto translate_fail;

> +        }

> +

> +        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));

> +    }

>

>      assert(!memory_region_is_iommu(section->mr));

> +    *xlat = addr;

>      return section;

> +

> +translate_fail:

> +    return &d->map.sections[PHYS_SECTION_UNASSIGNED];

>  }

>  #endif

>

> @@ -820,6 +960,9 @@ void cpu_exec_unrealizefn(CPUState *cpu)

>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {

>          vmstate_unregister(NULL, &vmstate_cpu_common, cpu);

>      }

> +#ifndef CONFIG_USER_ONLY

> +    tcg_iommu_free_notifier_list(cpu);

> +#endif

>  }

>

>  Property cpu_common_props[] = {



--
Alex Bennée
Peter Maydell May 23, 2018, 11:52 a.m. | #2
On 23 May 2018 at 10:51, Alex Bennée <alex.bennee@linaro.org> wrote:
>

> Peter Maydell <peter.maydell@linaro.org> writes:

>

>> Currently we don't support board configurations that put an IOMMU

>> in the path of the CPU's memory transactions, and instead just

>> assert() if the memory region fonud in address_space_translate_for_iotlb()

>> is an IOMMUMemoryRegion.

>>

>> Remove this limitation by having the function handle IOMMUs.

>> This is mostly straightforward, but we must make sure we have

>> a notifier registered for every IOMMU that a transaction has

>> passed through, so that we can flush the TLB appropriately

>> when any of the IOMMUs change their mappings.

>>

>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

>> ---

>>  include/exec/exec-all.h |   3 +-

>>  include/qom/cpu.h       |   3 +

>>  accel/tcg/cputlb.c      |   3 +-

>>  exec.c                  | 147 +++++++++++++++++++++++++++++++++++++++-

>>  4 files changed, 152 insertions(+), 4 deletions(-)

>>

>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

>> index 4d09eaba72..e0ff19b711 100644

>> --- a/include/exec/exec-all.h

>> +++ b/include/exec/exec-all.h

>> @@ -469,7 +469,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);

>>

>>  MemoryRegionSection *

>>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>> -                                  hwaddr *xlat, hwaddr *plen);

>> +                                  hwaddr *xlat, hwaddr *plen,

>> +                                  MemTxAttrs attrs, int *prot);

>>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

>>                                         MemoryRegionSection *section,

>>                                         target_ulong vaddr,

>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h

>> index 9d3afc6c75..d4a30149dd 100644

>> --- a/include/qom/cpu.h

>> +++ b/include/qom/cpu.h

>> @@ -429,6 +429,9 @@ struct CPUState {

>>      uint16_t pending_tlb_flush;

>>

>>      int hvf_fd;

>> +

>> +    /* track IOMMUs whose translations we've cached in the TCG TLB */

>> +    GSList *iommu_notifiers;

>

> So we are only concerned about TCG IOMMU notifiers here, specifically

> TCGIOMMUNotifier structures. Why not just use a GArray and save

> ourselves chasing pointers?


I don't have a strong opinion about which data structure to use;
but GSList has a "find an entry" API and GArray doesn't, so I
picked the one that had the API that meant I didn't need to
hand-code a search loop.

>> --- a/exec.c

>> +++ b/exec.c

>> @@ -650,18 +650,158 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,

>>      return mr;

>>  }

>>

>> +typedef struct TCGIOMMUNotifier {

>> +    IOMMUNotifier n;

>> +    MemoryRegion *mr;

>> +    CPUState *cpu;

>

> This seems superfluous if we are storing the list of notifiers in the CPUState


You need it because in the notifier callback all you get is a pointer
to the IOMMUNotifier, and we need to get from there to the CPUState*.

>> +    int iommu_idx;

>> +    bool active;

>> +} TCGIOMMUNotifier;


thanks
-- PMM
Eric Auger May 24, 2018, 7:54 p.m. | #3
Hi Peter,

On 05/23/2018 11:51 AM, Alex Bennée wrote:
> 

> Peter Maydell <peter.maydell@linaro.org> writes:

> 

>> Currently we don't support board configurations that put an IOMMU

>> in the path of the CPU's memory transactions, and instead just

>> assert() if the memory region fonud in address_space_translate_for_iotlb()

found
>> is an IOMMUMemoryRegion.

>>

>> Remove this limitation by having the function handle IOMMUs.

>> This is mostly straightforward, but we must make sure we have

>> a notifier registered for every IOMMU that a transaction has

>> passed through, so that we can flush the TLB appropriately

Can you elaborate on what (TCG) TLB we are talking about?

The concept of IOMMUs downstream to a CPU is not obvious to me. Maybe an
example may be documented in the commit message?
>> when any of the IOMMUs change their mappings.

>>

>> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>

>> ---

>>  include/exec/exec-all.h |   3 +-

>>  include/qom/cpu.h       |   3 +

>>  accel/tcg/cputlb.c      |   3 +-

>>  exec.c                  | 147 +++++++++++++++++++++++++++++++++++++++-

>>  4 files changed, 152 insertions(+), 4 deletions(-)

>>

>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

>> index 4d09eaba72..e0ff19b711 100644

>> --- a/include/exec/exec-all.h

>> +++ b/include/exec/exec-all.h

>> @@ -469,7 +469,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);

>>

>>  MemoryRegionSection *

>>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>> -                                  hwaddr *xlat, hwaddr *plen);

>> +                                  hwaddr *xlat, hwaddr *plen,

>> +                                  MemTxAttrs attrs, int *prot);

>>  hwaddr memory_region_section_get_iotlb(CPUState *cpu,

>>                                         MemoryRegionSection *section,

>>                                         target_ulong vaddr,

>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h

>> index 9d3afc6c75..d4a30149dd 100644

>> --- a/include/qom/cpu.h

>> +++ b/include/qom/cpu.h

>> @@ -429,6 +429,9 @@ struct CPUState {

>>      uint16_t pending_tlb_flush;

>>

>>      int hvf_fd;

>> +

>> +    /* track IOMMUs whose translations we've cached in the TCG TLB */

>> +    GSList *iommu_notifiers;

> 

> So we are only concerned about TCG IOMMU notifiers here, specifically

> TCGIOMMUNotifier structures. Why not just use a GArray and save

> ourselves chasing pointers?

> 

>>  };

>>

>>  QTAILQ_HEAD(CPUTailQ, CPUState);

>> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c

>> index 05439039e9..c8acaf21e9 100644

>> --- a/accel/tcg/cputlb.c

>> +++ b/accel/tcg/cputlb.c

>> @@ -632,7 +632,8 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,

>>      }

>>

>>      sz = size;

>> -    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);

>> +    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,

>> +                                                attrs, &prot);

>>      assert(sz >= TARGET_PAGE_SIZE);

>>

>>      tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx

>> diff --git a/exec.c b/exec.c

>> index c9285c9c39..6c8f2dcc3f 100644

>> --- a/exec.c

>> +++ b/exec.c

>> @@ -650,18 +650,158 @@ MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,

>>      return mr;

>>  }

>>

>> +typedef struct TCGIOMMUNotifier {

>> +    IOMMUNotifier n;

>> +    MemoryRegion *mr;

>> +    CPUState *cpu;

> 

> This seems superfluous if we are storing the list of notifiers in the CPUState

> 

>> +    int iommu_idx;

>> +    bool active;

>> +} TCGIOMMUNotifier;

>> +

>> +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)

>> +{

>> +    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);

>> +

>> +    if (!notifier->active) {

>> +        return;

>> +    }

>> +    tlb_flush(notifier->cpu);

>> +    notifier->active = false;

>> +    /* We leave the notifier struct on the list to avoid reallocating it later.

>> +     * Generally the number of IOMMUs a CPU deals with will be small.

>> +     * In any case we can't unregister the iommu notifier from a notify

>> +     * callback.

>> +     */

I don't get the life cycle of the notifier and why it becomes inactive
after the invalidate. Could you detail the specificity of this one?
>> +}

>> +

>> +static gint tcg_iommu_find_notifier(gconstpointer a, gconstpointer b)

>> +{

>> +    TCGIOMMUNotifier *notifier = (TCGIOMMUNotifier *)a;

>> +    TCGIOMMUNotifier *seeking = (TCGIOMMUNotifier *)b;

>> +

>> +    if (notifier->mr == seeking->mr &&

>> +        notifier->iommu_idx == seeking->iommu_idx) {

>> +        return 0;

>> +    }

>> +    return 1;

>> +}

>> +

>> +static void tcg_register_iommu_notifier(CPUState *cpu,

>> +                                        IOMMUMemoryRegion *iommu_mr,

>> +                                        int iommu_idx)

>> +{

>> +    /* Make sure this CPU has an IOMMU notifier registered for this

>> +     * IOMMU/IOMMU index combination, so that we can flush its TLB

>> +     * when the IOMMU tells us the mappings we've cached have changed.

>> +     */

>> +    TCGIOMMUNotifier seeking = {

>> +        .mr = MEMORY_REGION(iommu_mr),

>> +        .iommu_idx = iommu_idx,

>> +    };

>> +    TCGIOMMUNotifier *notifier;

>> +    GSList *found = g_slist_find_custom(cpu->iommu_notifiers,

>> +                                        &seeking,

>> +                                        tcg_iommu_find_notifier);

>> +    if (found) {

>> +        notifier = found->data;

>> +    } else {

>> +        notifier = g_new0(TCGIOMMUNotifier, 1);

>> +        notifier->mr = seeking.mr;

>> +        notifier->iommu_idx = iommu_idx;

>> +        notifier->cpu = cpu;

>> +        /* Rather than trying to register interest in the specific part

>> +         * of the iommu's address space that we've accessed and then

>> +         * expand it later as subsequent accesses touch more of it, we

>> +         * just register interest in the whole thing, on the assumption

>> +         * that iommu reconfiguration will be rare.

>> +         */

>> +        iommu_notifier_init(&notifier->n,

>> +                            tcg_iommu_unmap_notify,

>> +                            IOMMU_NOTIFIER_UNMAP,

>> +                            0,

>> +                            HWADDR_MAX,

>> +                            iommu_idx);

>> +        memory_region_register_iommu_notifier(notifier->mr, &notifier->n);

>> +        cpu->iommu_notifiers = g_slist_prepend(cpu->iommu_notifiers,

>> +                                               notifier);

>> +    }

>> +    if (!notifier->active) {

>> +        notifier->active = true;

>> +    }

>> +}

>> +

>> +static void tcg_iommu_notifier_destroy(gpointer data)

>> +{

>> +    TCGIOMMUNotifier *notifier = data;

>> +

>> +    if (notifier->active) {

>> +        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);

>> +    }

Is it safe to leave the notifier registered to an IOMMU whereas it gets
freed?

Thanks

Eric
>> +    g_free(notifier);

>> +}

>> +

>> +static void tcg_iommu_free_notifier_list(CPUState *cpu)

>> +{

>> +    /* Destroy the CPU's notifier list */

>> +    g_slist_free_full(cpu->iommu_notifiers, tcg_iommu_notifier_destroy);

>> +    cpu->iommu_notifiers = NULL;

>> +}

>> +

>>  /* Called from RCU critical section */

>>  MemoryRegionSection *

>>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>> -                                  hwaddr *xlat, hwaddr *plen)

>> +                                  hwaddr *xlat, hwaddr *plen,

>> +                                  MemTxAttrs attrs, int *prot)

>>  {

>>      MemoryRegionSection *section;

>> +    IOMMUMemoryRegion *iommu_mr;

>> +    IOMMUMemoryRegionClass *imrc;

>> +    IOMMUTLBEntry iotlb;

>> +    int iommu_idx;

>>      AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

>>

>> -    section = address_space_translate_internal(d, addr, xlat, plen, false);

>> +    for (;;) {

>> +        section = address_space_translate_internal(d, addr, &addr, plen, false);

>> +

>> +        iommu_mr = memory_region_get_iommu(section->mr);

>> +        if (!iommu_mr) {

>> +            break;

>> +        }

>> +

>> +        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);

>> +

>> +        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);

>> +        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);

>> +        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU

>> +         * doesn't short-cut its translation table walk.

it is not clear to me why you don't use the access flag as you seem to
handle the perm fault after the translate() call.

Thanks

Eric
>> +         */

>> +        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);

>> +        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)

>> +                | (addr & iotlb.addr_mask));

>> +        /* Update the caller's prot bits to remove permissions the IOMMU

>> +         * is giving us a failure response for. If we get down to no

>> +         * permissions left at all we can give up now.

>> +         */

>> +        if (!(iotlb.perm & IOMMU_RO)) {

>> +            *prot &= ~(PAGE_READ | PAGE_EXEC);

>> +        }

>> +        if (!(iotlb.perm & IOMMU_WO)) {

>> +            *prot &= ~PAGE_WRITE;

>> +        }

>> +

>> +        if (!*prot) {

>> +            goto translate_fail;

>> +        }

>> +

>> +        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));

>> +    }

>>

>>      assert(!memory_region_is_iommu(section->mr));

>> +    *xlat = addr;

>>      return section;

>> +

>> +translate_fail:

>> +    return &d->map.sections[PHYS_SECTION_UNASSIGNED];

>>  }

>>  #endif

>>

>> @@ -820,6 +960,9 @@ void cpu_exec_unrealizefn(CPUState *cpu)

>>      if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {

>>          vmstate_unregister(NULL, &vmstate_cpu_common, cpu);

>>      }

>> +#ifndef CONFIG_USER_ONLY

>> +    tcg_iommu_free_notifier_list(cpu);

>> +#endif

>>  }

>>

>>  Property cpu_common_props[] = {

> 

> 

> --

> Alex Bennée

>
Peter Maydell May 25, 2018, 8:52 a.m. | #4
On 24 May 2018 at 20:54, Auger Eric <eric.auger@redhat.com> wrote:
> Hi Peter,

>

> On 05/23/2018 11:51 AM, Alex Bennée wrote:

>>

>> Peter Maydell <peter.maydell@linaro.org> writes:

>>

>>> Currently we don't support board configurations that put an IOMMU

>>> in the path of the CPU's memory transactions, and instead just

>>> assert() if the memory region fonud in address_space_translate_for_iotlb()

> found

>>> is an IOMMUMemoryRegion.

>>>

>>> Remove this limitation by having the function handle IOMMUs.

>>> This is mostly straightforward, but we must make sure we have

>>> a notifier registered for every IOMMU that a transaction has

>>> passed through, so that we can flush the TLB appropriately

> Can you elaborate on what (TCG) TLB we are talking about?


The TCG TLB, as implemented in accel/tcg/cputlb.c. Basically
the thing that caches the results it gets back from the memory
system so it can fast path device and memory accesses.

> The concept of IOMMUs downstream to a CPU is not obvious to me. Maybe an

> example may be documented in the commit message?


The MPC implemented in this patchset is an example.



>>> +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)

>>> +{

>>> +    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);

>>> +

>>> +    if (!notifier->active) {

>>> +        return;

>>> +    }

>>> +    tlb_flush(notifier->cpu);

>>> +    notifier->active = false;

>>> +    /* We leave the notifier struct on the list to avoid reallocating it later.

>>> +     * Generally the number of IOMMUs a CPU deals with will be small.

>>> +     * In any case we can't unregister the iommu notifier from a notify

>>> +     * callback.

>>> +     */

> I don't get the life cycle of the notifier and why it becomes inactive

> after the invalidate. Could you detail the specificity of this one?


Once we've flushed the TLB it is empty and will have no cached
information from the IOMMU. So there's no point in flushing the
TLB again (which is expensive) until the next time a transaction
goes through the IOMMU and we're caching something from it.

So the cycle goes:
 * CPU makes transaction that goes through an IOMMU
 * in tcg_register_iommu_notifier() we register the notifier
   if we haven't already, and make sure it's got active = true
 * in the unmap notify, we flush the whole TLB for the CPU, and
   set active = false
 * repeat...


>>> +static void tcg_iommu_notifier_destroy(gpointer data)

>>> +{

>>> +    TCGIOMMUNotifier *notifier = data;

>>> +

>>> +    if (notifier->active) {

>>> +        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);

>>> +    }

> Is it safe to leave the notifier registered to an IOMMU whereas it gets

> freed?


Oh, this is a bug, left over from my first idea (which was to
unregister the IOMMU notifier in the notifier unmap callback,
in which case active == true would be the only case when we
had a registered notifier).

We should unconditionally unregister the notifier here.


>>>  /* Called from RCU critical section */

>>>  MemoryRegionSection *

>>>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>>> -                                  hwaddr *xlat, hwaddr *plen)

>>> +                                  hwaddr *xlat, hwaddr *plen,

>>> +                                  MemTxAttrs attrs, int *prot)

>>>  {

>>>      MemoryRegionSection *section;

>>> +    IOMMUMemoryRegion *iommu_mr;

>>> +    IOMMUMemoryRegionClass *imrc;

>>> +    IOMMUTLBEntry iotlb;

>>> +    int iommu_idx;

>>>      AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

>>>

>>> -    section = address_space_translate_internal(d, addr, xlat, plen, false);

>>> +    for (;;) {

>>> +        section = address_space_translate_internal(d, addr, &addr, plen, false);

>>> +

>>> +        iommu_mr = memory_region_get_iommu(section->mr);

>>> +        if (!iommu_mr) {

>>> +            break;

>>> +        }

>>> +

>>> +        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);

>>> +

>>> +        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);

>>> +        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);

>>> +        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU

>>> +         * doesn't short-cut its translation table walk.

> it is not clear to me why you don't use the access flag as you seem to

> handle the perm fault after the translate() call.


We need to know all the permissions (because we'll cache the result
in the TCG TLB and later use them for future read and write accesses),
so we pass IOMMU_NONE.

My understanding from previous discussion is that the only
reason to pass in some other access flag value is if you
only care about one of read or write and want to allow the
IOMMU to stop walking the page table early as soon as it decides
it doesn't have permissions.

thanks
-- PMM
Eric Auger May 25, 2018, 9:50 a.m. | #5
Hi Peter,

On 05/25/2018 10:52 AM, Peter Maydell wrote:
> On 24 May 2018 at 20:54, Auger Eric <eric.auger@redhat.com> wrote:

>> Hi Peter,

>>

>> On 05/23/2018 11:51 AM, Alex Bennée wrote:

>>>

>>> Peter Maydell <peter.maydell@linaro.org> writes:

>>>

>>>> Currently we don't support board configurations that put an IOMMU

>>>> in the path of the CPU's memory transactions, and instead just

>>>> assert() if the memory region fonud in address_space_translate_for_iotlb()

>> found

>>>> is an IOMMUMemoryRegion.

>>>>

>>>> Remove this limitation by having the function handle IOMMUs.

>>>> This is mostly straightforward, but we must make sure we have

>>>> a notifier registered for every IOMMU that a transaction has

>>>> passed through, so that we can flush the TLB appropriately

>> Can you elaborate on what (TCG) TLB we are talking about?

> 

> The TCG TLB, as implemented in accel/tcg/cputlb.c. Basically

> the thing that caches the results it gets back from the memory

> system so it can fast path device and memory accesses.

> 

>> The concept of IOMMUs downstream to a CPU is not obvious to me. Maybe an

>> example may be documented in the commit message?

> 

> The MPC implemented in this patchset is an example.

> 

> 

> 

>>>> +static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)

>>>> +{

>>>> +    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);

>>>> +

>>>> +    if (!notifier->active) {

>>>> +        return;

>>>> +    }

>>>> +    tlb_flush(notifier->cpu);

>>>> +    notifier->active = false;

>>>> +    /* We leave the notifier struct on the list to avoid reallocating it later.

>>>> +     * Generally the number of IOMMUs a CPU deals with will be small.

>>>> +     * In any case we can't unregister the iommu notifier from a notify

>>>> +     * callback.

>>>> +     */

>> I don't get the life cycle of the notifier and why it becomes inactive

>> after the invalidate. Could you detail the specificity of this one?

> 

> Once we've flushed the TLB it is empty and will have no cached

> information from the IOMMU. So there's no point in flushing the

> TLB again (which is expensive) until the next time a transaction

> goes through the IOMMU and we're caching something from it.

Ak OK. there is no finer granularity for TLB flush?

> 

> So the cycle goes:

>  * CPU makes transaction that goes through an IOMMU

>  * in tcg_register_iommu_notifier() we register the notifier

>    if we haven't already, and make sure it's got active = true

>  * in the unmap notify, we flush the whole TLB for the CPU, and

>    set active = false

>  * repeat...

OK thank you for the explanation
> 

> 

>>>> +static void tcg_iommu_notifier_destroy(gpointer data)

>>>> +{

>>>> +    TCGIOMMUNotifier *notifier = data;

>>>> +

>>>> +    if (notifier->active) {

>>>> +        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);

>>>> +    }

>> Is it safe to leave the notifier registered to an IOMMU whereas it gets

>> freed?

> 

> Oh, this is a bug, left over from my first idea (which was to

> unregister the IOMMU notifier in the notifier unmap callback,

> in which case active == true would be the only case when we

> had a registered notifier).

> 

> We should unconditionally unregister the notifier here.

> 

> 

>>>>  /* Called from RCU critical section */

>>>>  MemoryRegionSection *

>>>>  address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,

>>>> -                                  hwaddr *xlat, hwaddr *plen)

>>>> +                                  hwaddr *xlat, hwaddr *plen,

>>>> +                                  MemTxAttrs attrs, int *prot)

>>>>  {

>>>>      MemoryRegionSection *section;

>>>> +    IOMMUMemoryRegion *iommu_mr;

>>>> +    IOMMUMemoryRegionClass *imrc;

>>>> +    IOMMUTLBEntry iotlb;

>>>> +    int iommu_idx;

>>>>      AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);

>>>>

>>>> -    section = address_space_translate_internal(d, addr, xlat, plen, false);

>>>> +    for (;;) {

>>>> +        section = address_space_translate_internal(d, addr, &addr, plen, false);

>>>> +

>>>> +        iommu_mr = memory_region_get_iommu(section->mr);

>>>> +        if (!iommu_mr) {

>>>> +            break;

>>>> +        }

>>>> +

>>>> +        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);

>>>> +

>>>> +        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);

>>>> +        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);

>>>> +        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU

>>>> +         * doesn't short-cut its translation table walk.

>> it is not clear to me why you don't use the access flag as you seem to

>> handle the perm fault after the translate() call.

> 

> We need to know all the permissions (because we'll cache the result

> in the TCG TLB and later use them for future read and write accesses),

> so we pass IOMMU_NONE.

> 

> My understanding from previous discussion is that the only

> reason to pass in some other access flag value is if you

> only care about one of read or write and want to allow the

> IOMMU to stop walking the page table early as soon as it decides

> it doesn't have permissions.


agreed. So you need to fetch the whole set of table permissions to
update the TLB. By the way where is the TLB updated?

Thanks

Eric
> 

> thanks

> -- PMM

>
Peter Maydell May 25, 2018, 9:59 a.m. | #6
On 25 May 2018 at 10:50, Auger Eric <eric.auger@redhat.com> wrote:
> On 05/25/2018 10:52 AM, Peter Maydell wrote:

>> Once we've flushed the TLB it is empty and will have no cached

>> information from the IOMMU. So there's no point in flushing the

>> TLB again (which is expensive) until the next time a transaction

>> goes through the IOMMU and we're caching something from it.

> Ak OK. there is no finer granularity for TLB flush?


Yes, there is -- you can flush by (input) address; but I
opted to do a global flush, because it doesn't require
complicated tracking of which parts of the IOMMU's address
range we care about, and in general I expect IOMMU config
changes to be fairly rare:

+        /* Rather than trying to register interest in the specific part
+         * of the iommu's address space that we've accessed and then
+         * expand it later as subsequent accesses touch more of it, we
+         * just register interest in the whole thing, on the assumption
+         * that iommu reconfiguration will be rare.
+         */

We can always come back and revisit that if there turns
out to be a performance problem here in practice.

>> We need to know all the permissions (because we'll cache the result

>> in the TCG TLB and later use them for future read and write accesses),

>> so we pass IOMMU_NONE.

>>

>> My understanding from previous discussion is that the only

>> reason to pass in some other access flag value is if you

>> only care about one of read or write and want to allow the

>> IOMMU to stop walking the page table early as soon as it decides

>> it doesn't have permissions.

>

> agreed. So you need to fetch the whole set of table permissions to

> update the TLB. By the way where is the TLB updated?


tlb_set_page_with_attrs() calls address_space_translate_for_iotlb(),
which looks up what's at that address, including doing IOMMU
translations. Then tlb_set_page_with_attrs() fills in the TLB
data structure with the results.

thanks
-- PMM

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 4d09eaba72..e0ff19b711 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -469,7 +469,8 @@  void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr);
 
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-                                  hwaddr *xlat, hwaddr *plen);
+                                  hwaddr *xlat, hwaddr *plen,
+                                  MemTxAttrs attrs, int *prot);
 hwaddr memory_region_section_get_iotlb(CPUState *cpu,
                                        MemoryRegionSection *section,
                                        target_ulong vaddr,
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 9d3afc6c75..d4a30149dd 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -429,6 +429,9 @@  struct CPUState {
     uint16_t pending_tlb_flush;
 
     int hvf_fd;
+
+    /* track IOMMUs whose translations we've cached in the TCG TLB */
+    GSList *iommu_notifiers;
 };
 
 QTAILQ_HEAD(CPUTailQ, CPUState);
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 05439039e9..c8acaf21e9 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -632,7 +632,8 @@  void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
     }
 
     sz = size;
-    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz);
+    section = address_space_translate_for_iotlb(cpu, asidx, paddr, &xlat, &sz,
+                                                attrs, &prot);
     assert(sz >= TARGET_PAGE_SIZE);
 
     tlb_debug("vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
diff --git a/exec.c b/exec.c
index c9285c9c39..6c8f2dcc3f 100644
--- a/exec.c
+++ b/exec.c
@@ -650,18 +650,158 @@  MemoryRegion *flatview_translate(FlatView *fv, hwaddr addr, hwaddr *xlat,
     return mr;
 }
 
+typedef struct TCGIOMMUNotifier {
+    IOMMUNotifier n;
+    MemoryRegion *mr;
+    CPUState *cpu;
+    int iommu_idx;
+    bool active;
+} TCGIOMMUNotifier;
+
+static void tcg_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
+{
+    TCGIOMMUNotifier *notifier = container_of(n, TCGIOMMUNotifier, n);
+
+    if (!notifier->active) {
+        return;
+    }
+    tlb_flush(notifier->cpu);
+    notifier->active = false;
+    /* We leave the notifier struct on the list to avoid reallocating it later.
+     * Generally the number of IOMMUs a CPU deals with will be small.
+     * In any case we can't unregister the iommu notifier from a notify
+     * callback.
+     */
+}
+
+static gint tcg_iommu_find_notifier(gconstpointer a, gconstpointer b)
+{
+    TCGIOMMUNotifier *notifier = (TCGIOMMUNotifier *)a;
+    TCGIOMMUNotifier *seeking = (TCGIOMMUNotifier *)b;
+
+    if (notifier->mr == seeking->mr &&
+        notifier->iommu_idx == seeking->iommu_idx) {
+        return 0;
+    }
+    return 1;
+}
+
+static void tcg_register_iommu_notifier(CPUState *cpu,
+                                        IOMMUMemoryRegion *iommu_mr,
+                                        int iommu_idx)
+{
+    /* Make sure this CPU has an IOMMU notifier registered for this
+     * IOMMU/IOMMU index combination, so that we can flush its TLB
+     * when the IOMMU tells us the mappings we've cached have changed.
+     */
+    TCGIOMMUNotifier seeking = {
+        .mr = MEMORY_REGION(iommu_mr),
+        .iommu_idx = iommu_idx,
+    };
+    TCGIOMMUNotifier *notifier;
+    GSList *found = g_slist_find_custom(cpu->iommu_notifiers,
+                                        &seeking,
+                                        tcg_iommu_find_notifier);
+    if (found) {
+        notifier = found->data;
+    } else {
+        notifier = g_new0(TCGIOMMUNotifier, 1);
+        notifier->mr = seeking.mr;
+        notifier->iommu_idx = iommu_idx;
+        notifier->cpu = cpu;
+        /* Rather than trying to register interest in the specific part
+         * of the iommu's address space that we've accessed and then
+         * expand it later as subsequent accesses touch more of it, we
+         * just register interest in the whole thing, on the assumption
+         * that iommu reconfiguration will be rare.
+         */
+        iommu_notifier_init(&notifier->n,
+                            tcg_iommu_unmap_notify,
+                            IOMMU_NOTIFIER_UNMAP,
+                            0,
+                            HWADDR_MAX,
+                            iommu_idx);
+        memory_region_register_iommu_notifier(notifier->mr, &notifier->n);
+        cpu->iommu_notifiers = g_slist_prepend(cpu->iommu_notifiers,
+                                               notifier);
+    }
+    if (!notifier->active) {
+        notifier->active = true;
+    }
+}
+
+static void tcg_iommu_notifier_destroy(gpointer data)
+{
+    TCGIOMMUNotifier *notifier = data;
+
+    if (notifier->active) {
+        memory_region_unregister_iommu_notifier(notifier->mr, &notifier->n);
+    }
+    g_free(notifier);
+}
+
+static void tcg_iommu_free_notifier_list(CPUState *cpu)
+{
+    /* Destroy the CPU's notifier list */
+    g_slist_free_full(cpu->iommu_notifiers, tcg_iommu_notifier_destroy);
+    cpu->iommu_notifiers = NULL;
+}
+
 /* Called from RCU critical section */
 MemoryRegionSection *
 address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-                                  hwaddr *xlat, hwaddr *plen)
+                                  hwaddr *xlat, hwaddr *plen,
+                                  MemTxAttrs attrs, int *prot)
 {
     MemoryRegionSection *section;
+    IOMMUMemoryRegion *iommu_mr;
+    IOMMUMemoryRegionClass *imrc;
+    IOMMUTLBEntry iotlb;
+    int iommu_idx;
     AddressSpaceDispatch *d = atomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
 
-    section = address_space_translate_internal(d, addr, xlat, plen, false);
+    for (;;) {
+        section = address_space_translate_internal(d, addr, &addr, plen, false);
+
+        iommu_mr = memory_region_get_iommu(section->mr);
+        if (!iommu_mr) {
+            break;
+        }
+
+        imrc = memory_region_get_iommu_class_nocheck(iommu_mr);
+
+        iommu_idx = imrc->attrs_to_index(iommu_mr, attrs);
+        tcg_register_iommu_notifier(cpu, iommu_mr, iommu_idx);
+        /* We need all the permissions, so pass IOMMU_NONE so the IOMMU
+         * doesn't short-cut its translation table walk.
+         */
+        iotlb = imrc->translate(iommu_mr, addr, IOMMU_NONE, iommu_idx);
+        addr = ((iotlb.translated_addr & ~iotlb.addr_mask)
+                | (addr & iotlb.addr_mask));
+        /* Update the caller's prot bits to remove permissions the IOMMU
+         * is giving us a failure response for. If we get down to no
+         * permissions left at all we can give up now.
+         */
+        if (!(iotlb.perm & IOMMU_RO)) {
+            *prot &= ~(PAGE_READ | PAGE_EXEC);
+        }
+        if (!(iotlb.perm & IOMMU_WO)) {
+            *prot &= ~PAGE_WRITE;
+        }
+
+        if (!*prot) {
+            goto translate_fail;
+        }
+
+        d = flatview_to_dispatch(address_space_to_flatview(iotlb.target_as));
+    }
 
     assert(!memory_region_is_iommu(section->mr));
+    *xlat = addr;
     return section;
+
+translate_fail:
+    return &d->map.sections[PHYS_SECTION_UNASSIGNED];
 }
 #endif
 
@@ -820,6 +960,9 @@  void cpu_exec_unrealizefn(CPUState *cpu)
     if (qdev_get_vmsd(DEVICE(cpu)) == NULL) {
         vmstate_unregister(NULL, &vmstate_cpu_common, cpu);
     }
+#ifndef CONFIG_USER_ONLY
+    tcg_iommu_free_notifier_list(cpu);
+#endif
 }
 
 Property cpu_common_props[] = {