diff mbox

[Xen-devel,v5,12/14] xen/arm: p2m: Clean cache PT when the IOMMU doesn't support coherent walk

Message ID 1399996230-18201-13-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall May 13, 2014, 3:50 p.m. UTC
Some IOMMU don't suppport coherent PT walk. When the p2m is shared with
the CPU, Xen has to make sure the PT changes have reached the memory.

Introduce new IOMMU callback that will retrieve the IOMMU feature for a
specified domain.

On ARM, the platform can contain multiple IOMMUs. Each of them may not
have the same set of feature. The domain parameter will be used to get the
set of features for IOMMUs used by this domain.

Signed-off-by: Julien Grall <julien.grall@linaro.org>
Cc: Jan Beulich <jbeulich@suse.com>
Cc: Xiantao Zhang <xiantao.zhang@intel.com>

---
    Changes in v5:
        - Flush on every write_pte instead of unmap page. This will
        avoid to flush a whole page when only few bytes are modified
        - Only get iommu feature once.
        - Add bits to flush cache when a new table is created
        - Fix typoes in commit message and comment
        - Use an enum to describe the feature. Each items are a bit
        position

    Changes in v4:
        - Patch added
---
 xen/arch/arm/p2m.c              |   36 ++++++++++++++++++++++++++++--------
 xen/drivers/passthrough/iommu.c |   11 +++++++++++
 xen/include/xen/iommu.h         |    9 +++++++++
 3 files changed, 48 insertions(+), 8 deletions(-)

Comments

Jan Beulich May 14, 2014, 7:18 a.m. UTC | #1
> --- a/xen/drivers/passthrough/iommu.c
> +++ b/xen/drivers/passthrough/iommu.c
> @@ -344,6 +344,17 @@ void iommu_crash_shutdown(void)
>      iommu_enabled = iommu_intremap = 0;
>  }
>  
> +bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
> +{
> +    const struct iommu_ops *ops = domain_hvm_iommu(d)->platform_ops;
> +    uint32_t features = 0;

Please here and further down - don't use fixed width type unless
you really need to.

> --- a/xen/include/xen/iommu.h
> +++ b/xen/include/xen/iommu.h
> @@ -67,6 +67,14 @@ int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
>                     unsigned int flags);
>  int iommu_unmap_page(struct domain *d, unsigned long gfn);
>  
> +enum iommu_feature
> +{
> +    IOMMU_FEAT_COHERENT_WALK = 1,

Why 1? Enumerations are defined to start at zero, and starting at
zero is what you really want here. Don't specify a value at all.

> @@ -139,6 +147,7 @@ struct iommu_ops {
>      void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>      void (*iotlb_flush_all)(struct domain *d);
>      void (*dump_p2m_table)(struct domain *d);
> +    uint32_t (*features)(struct domain *d);

I think I said this on an earlier round already - for future extensibility
this should return "const unsigned long *", and get accessed by the
wrapper function using test_bit(). Or even better without an accessor
function at all, just directly having a "const unsigned long *" field here.
Unless of course the backend implementation - which isn't part of this
patch - would have difficulty setting up a suitable bitfield during (early)
initialization.

Jan
Julien Grall May 14, 2014, 9:09 a.m. UTC | #2
Hi Ian,

On 14/05/14 08:18, Jan Beulich wrote:
>> --- a/xen/drivers/passthrough/iommu.c
>> +++ b/xen/drivers/passthrough/iommu.c
>> @@ -344,6 +344,17 @@ void iommu_crash_shutdown(void)
>>       iommu_enabled = iommu_intremap = 0;
>>   }
>>
>> +bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
>> +{
>> +    const struct iommu_ops *ops = domain_hvm_iommu(d)->platform_ops;
>> +    uint32_t features = 0;
>
> Please here and further down - don't use fixed width type unless
> you really need to.
>
>> --- a/xen/include/xen/iommu.h
>> +++ b/xen/include/xen/iommu.h
>> @@ -67,6 +67,14 @@ int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
>>                      unsigned int flags);
>>   int iommu_unmap_page(struct domain *d, unsigned long gfn);
>>
>> +enum iommu_feature
>> +{
>> +    IOMMU_FEAT_COHERENT_WALK = 1,
>
> Why 1? Enumerations are defined to start at zero, and starting at
> zero is what you really want here. Don't specify a value at all.

It's a mistake when I create the enum. I will drop the 1.

>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>>       void (*iotlb_flush_all)(struct domain *d);
>>       void (*dump_p2m_table)(struct domain *d);
>> +    uint32_t (*features)(struct domain *d);
>
> I think I said this on an earlier round already - for future extensibility
> this should return "const unsigned long *", and get accessed by the
> wrapper function using test_bit(). Or even better without an accessor
> function at all, just directly having a "const unsigned long *" field here.
> Unless of course the backend implementation - which isn't part of this
> patch - would have difficulty setting up a suitable bitfield during (early)
> initialization.

The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
specifications (e.g coherent walk support or not).

As each domain doesn't use all SMMUs, we might be able to avoid flushing 
PT on some of them. That's why I've choose to use a callback with the 
domain in parameter.

I don't like the solution which return "unsigned long *" because we are 
assuming the driver will always a valid pointer (for instance with 2 
unsigned long), even if he doesn't need it.

Regards,
Jan Beulich May 14, 2014, 9:25 a.m. UTC | #3
>>> On 14.05.14 at 11:09, <julien.grall@linaro.org> wrote:
>>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>>>       void (*iotlb_flush_all)(struct domain *d);
>>>       void (*dump_p2m_table)(struct domain *d);
>>> +    uint32_t (*features)(struct domain *d);
>>
>> I think I said this on an earlier round already - for future extensibility
>> this should return "const unsigned long *", and get accessed by the
>> wrapper function using test_bit(). Or even better without an accessor
>> function at all, just directly having a "const unsigned long *" field here.
>> Unless of course the backend implementation - which isn't part of this
>> patch - would have difficulty setting up a suitable bitfield during (early)
>> initialization.
> 
> The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
> specifications (e.g coherent walk support or not).
> 
> As each domain doesn't use all SMMUs, we might be able to avoid flushing 
> PT on some of them. That's why I've choose to use a callback with the 
> domain in parameter.
> 
> I don't like the solution which return "unsigned long *" because we are 
> assuming the driver will always a valid pointer (for instance with 2 
> unsigned long), even if he doesn't need it.

In that case simply make this a bitmap embedded in struct iommu_ops,
which the driver has to populate suitably early. That way the individual
drivers don't need to care about the eventually growing size.

Jan
Julien Grall May 14, 2014, 12:45 p.m. UTC | #4
On 05/14/2014 10:25 AM, Jan Beulich wrote:
>>>> On 14.05.14 at 11:09, <julien.grall@linaro.org> wrote:
>>>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
>>>>       void (*iotlb_flush_all)(struct domain *d);
>>>>       void (*dump_p2m_table)(struct domain *d);
>>>> +    uint32_t (*features)(struct domain *d);
>>>
>>> I think I said this on an earlier round already - for future extensibility
>>> this should return "const unsigned long *", and get accessed by the
>>> wrapper function using test_bit(). Or even better without an accessor
>>> function at all, just directly having a "const unsigned long *" field here.
>>> Unless of course the backend implementation - which isn't part of this
>>> patch - would have difficulty setting up a suitable bitfield during (early)
>>> initialization.
>>
>> The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
>> specifications (e.g coherent walk support or not).
>>
>> As each domain doesn't use all SMMUs, we might be able to avoid flushing 
>> PT on some of them. That's why I've choose to use a callback with the 
>> domain in parameter.
>>
>> I don't like the solution which return "unsigned long *" because we are 
>> assuming the driver will always a valid pointer (for instance with 2 
>> unsigned long), even if he doesn't need it.
> 
> In that case simply make this a bitmap embedded in struct iommu_ops,
> which the driver has to populate suitably early. That way the individual
> drivers don't need to care about the eventually growing size.

That doesn't really help me. Because a same platform can have a mix of
SMMU support coherent walk or not.

For optimization it would be better to have a per-domain feature.

What about creating a callback:
iommu_has_feature(struct domain *d, enum iommu_feature)

and let the driver handling the feature?

Regards,
Jan Beulich May 14, 2014, 1:08 p.m. UTC | #5
>>> On 14.05.14 at 14:45, <julien.grall@linaro.org> wrote:
> On 05/14/2014 10:25 AM, Jan Beulich wrote:
>>>>> On 14.05.14 at 11:09, <julien.grall@linaro.org> wrote:
>>>>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>>>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int 
> page_count);
>>>>>       void (*iotlb_flush_all)(struct domain *d);
>>>>>       void (*dump_p2m_table)(struct domain *d);
>>>>> +    uint32_t (*features)(struct domain *d);
>>>>
>>>> I think I said this on an earlier round already - for future extensibility
>>>> this should return "const unsigned long *", and get accessed by the
>>>> wrapper function using test_bit(). Or even better without an accessor
>>>> function at all, just directly having a "const unsigned long *" field here.
>>>> Unless of course the backend implementation - which isn't part of this
>>>> patch - would have difficulty setting up a suitable bitfield during (early)
>>>> initialization.
>>>
>>> The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
>>> specifications (e.g coherent walk support or not).
>>>
>>> As each domain doesn't use all SMMUs, we might be able to avoid flushing 
>>> PT on some of them. That's why I've choose to use a callback with the 
>>> domain in parameter.
>>>
>>> I don't like the solution which return "unsigned long *" because we are 
>>> assuming the driver will always a valid pointer (for instance with 2 
>>> unsigned long), even if he doesn't need it.
>> 
>> In that case simply make this a bitmap embedded in struct iommu_ops,
>> which the driver has to populate suitably early. That way the individual
>> drivers don't need to care about the eventually growing size.
> 
> That doesn't really help me. Because a same platform can have a mix of
> SMMU support coherent walk or not.
> 
> For optimization it would be better to have a per-domain feature.

Oh, right, I didn't pay attention to you passing a domain into the
query.

> What about creating a callback:
> iommu_has_feature(struct domain *d, enum iommu_feature)
> 
> and let the driver handling the feature?

That sounds fine as long as you don't expect such to sit on any hot
path (indirect calls are expensive not only on x86 I think).

Jan
Julien Grall May 14, 2014, 1:11 p.m. UTC | #6
On 05/14/2014 02:08 PM, Jan Beulich wrote:
>>>> On 14.05.14 at 14:45, <julien.grall@linaro.org> wrote:
>> On 05/14/2014 10:25 AM, Jan Beulich wrote:
>>>>>> On 14.05.14 at 11:09, <julien.grall@linaro.org> wrote:
>>>>>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>>>>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int 
>> page_count);
>>>>>>       void (*iotlb_flush_all)(struct domain *d);
>>>>>>       void (*dump_p2m_table)(struct domain *d);
>>>>>> +    uint32_t (*features)(struct domain *d);
>>>>>
>>>>> I think I said this on an earlier round already - for future extensibility
>>>>> this should return "const unsigned long *", and get accessed by the
>>>>> wrapper function using test_bit(). Or even better without an accessor
>>>>> function at all, just directly having a "const unsigned long *" field here.
>>>>> Unless of course the backend implementation - which isn't part of this
>>>>> patch - would have difficulty setting up a suitable bitfield during (early)
>>>>> initialization.
>>>>
>>>> The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
>>>> specifications (e.g coherent walk support or not).
>>>>
>>>> As each domain doesn't use all SMMUs, we might be able to avoid flushing 
>>>> PT on some of them. That's why I've choose to use a callback with the 
>>>> domain in parameter.
>>>>
>>>> I don't like the solution which return "unsigned long *" because we are 
>>>> assuming the driver will always a valid pointer (for instance with 2 
>>>> unsigned long), even if he doesn't need it.
>>>
>>> In that case simply make this a bitmap embedded in struct iommu_ops,
>>> which the driver has to populate suitably early. That way the individual
>>> drivers don't need to care about the eventually growing size.
>>
>> That doesn't really help me. Because a same platform can have a mix of
>> SMMU support coherent walk or not.
>>
>> For optimization it would be better to have a per-domain feature.
> 
> Oh, right, I didn't pay attention to you passing a domain into the
> query.
> 
>> What about creating a callback:
>> iommu_has_feature(struct domain *d, enum iommu_feature)
>>
>> and let the driver handling the feature?
> 
> That sounds fine as long as you don't expect such to sit on any hot
> path (indirect calls are expensive not only on x86 I think).

Actually, it's called during an hot patch (every time a page is mapped
in the p2m).

I've already optimized a bit by only call the function one per batch of
mapping (as iommu TLB flush).

Regards,
Jan Beulich May 14, 2014, 1:15 p.m. UTC | #7
>>> On 14.05.14 at 15:11, <julien.grall@linaro.org> wrote:
> On 05/14/2014 02:08 PM, Jan Beulich wrote:
>>>>> On 14.05.14 at 14:45, <julien.grall@linaro.org> wrote:
>>> On 05/14/2014 10:25 AM, Jan Beulich wrote:
>>>>>>> On 14.05.14 at 11:09, <julien.grall@linaro.org> wrote:
>>>>>>> @@ -139,6 +147,7 @@ struct iommu_ops {
>>>>>>>       void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int 
>>> page_count);
>>>>>>>       void (*iotlb_flush_all)(struct domain *d);
>>>>>>>       void (*dump_p2m_table)(struct domain *d);
>>>>>>> +    uint32_t (*features)(struct domain *d);
>>>>>>
>>>>>> I think I said this on an earlier round already - for future extensibility
>>>>>> this should return "const unsigned long *", and get accessed by the
>>>>>> wrapper function using test_bit(). Or even better without an accessor
>>>>>> function at all, just directly having a "const unsigned long *" field here.
>>>>>> Unless of course the backend implementation - which isn't part of this
>>>>>> patch - would have difficulty setting up a suitable bitfield during (early)
>>>>>> initialization.
>>>>>
>>>>> The SMMU drivers handle multiple SMMUs. Each SMMU can have different 
>>>>> specifications (e.g coherent walk support or not).
>>>>>
>>>>> As each domain doesn't use all SMMUs, we might be able to avoid flushing 
>>>>> PT on some of them. That's why I've choose to use a callback with the 
>>>>> domain in parameter.
>>>>>
>>>>> I don't like the solution which return "unsigned long *" because we are 
>>>>> assuming the driver will always a valid pointer (for instance with 2 
>>>>> unsigned long), even if he doesn't need it.
>>>>
>>>> In that case simply make this a bitmap embedded in struct iommu_ops,
>>>> which the driver has to populate suitably early. That way the individual
>>>> drivers don't need to care about the eventually growing size.
>>>
>>> That doesn't really help me. Because a same platform can have a mix of
>>> SMMU support coherent walk or not.
>>>
>>> For optimization it would be better to have a per-domain feature.
>> 
>> Oh, right, I didn't pay attention to you passing a domain into the
>> query.
>> 
>>> What about creating a callback:
>>> iommu_has_feature(struct domain *d, enum iommu_feature)
>>>
>>> and let the driver handling the feature?
>> 
>> That sounds fine as long as you don't expect such to sit on any hot
>> path (indirect calls are expensive not only on x86 I think).
> 
> Actually, it's called during an hot patch (every time a page is mapped
> in the p2m).

In that case, why don't you put the feature flag(s) somewhere in
struct domain or struct arch_domain (or some sub-structure thereof)?

Jan
Julien Grall May 14, 2014, 1:56 p.m. UTC | #8
On 05/14/2014 02:15 PM, Jan Beulich wrote:
> In that case, why don't you put the feature flag(s) somewhere in
> struct domain or struct arch_domain (or some sub-structure thereof)?

I didn't think about this solution. I will give a try.

Regards,
diff mbox

Patch

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 816da21..4331866 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -253,9 +253,15 @@  static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr,
     return e;
 }
 
+static inline void p2m_write_pte(lpae_t *p, lpae_t pte, bool_t flush_cache)
+{
+    write_pte(p, pte);
+    if ( flush_cache )
+        clean_xen_dcache(*p);
+}
+
 /* Allocate a new page table page and hook it in via the given entry */
-static int p2m_create_table(struct domain *d,
-                            lpae_t *entry)
+static int p2m_create_table(struct domain *d, lpae_t *entry, bool_t flush_cache)
 {
     struct p2m_domain *p2m = &d->arch.p2m;
     struct page_info *page;
@@ -272,11 +278,13 @@  static int p2m_create_table(struct domain *d,
 
     p = __map_domain_page(page);
     clear_page(p);
+    if ( flush_cache )
+        clean_xen_dcache_va_range(p, PAGE_SIZE);
     unmap_domain_page(p);
 
     pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
 
-    write_pte(entry, pte);
+    p2m_write_pte(entry, pte, flush_cache);
 
     return 0;
 }
@@ -308,6 +316,13 @@  static int apply_p2m_changes(struct domain *d,
     unsigned int flush = 0;
     bool_t populate = (op == INSERT || op == ALLOCATE);
     lpae_t pte;
+    bool_t flush_pt;
+
+    /* Some IOMMU don't support coherent PT walk. When the p2m is
+     * shared with the CPU, Xen has to make sure that the PT changes have
+     * reached the memory
+     */
+    flush_pt = need_iommu(d) && !iommu_has_feature(d, IOMMU_FEAT_COHERENT_WALK);
 
     spin_lock(&p2m->lock);
 
@@ -334,7 +349,8 @@  static int apply_p2m_changes(struct domain *d,
                 continue;
             }
 
-            rc = p2m_create_table(d, &first[first_table_offset(addr)]);
+            rc = p2m_create_table(d, &first[first_table_offset(addr)],
+                                  flush_pt);
             if ( rc < 0 )
             {
                 printk("p2m_populate_ram: L1 failed\n");
@@ -360,7 +376,8 @@  static int apply_p2m_changes(struct domain *d,
                 continue;
             }
 
-            rc = p2m_create_table(d, &second[second_table_offset(addr)]);
+            rc = p2m_create_table(d, &second[second_table_offset(addr)],
+                                  flush_pt);
             if ( rc < 0 ) {
                 printk("p2m_populate_ram: L2 failed\n");
                 goto out;
@@ -411,13 +428,15 @@  static int apply_p2m_changes(struct domain *d,
 
                     pte = mfn_to_p2m_entry(page_to_mfn(page), mattr, t);
 
-                    write_pte(&third[third_table_offset(addr)], pte);
+                    p2m_write_pte(&third[third_table_offset(addr)],
+                                  pte, flush_pt);
                 }
                 break;
             case INSERT:
                 {
                     pte = mfn_to_p2m_entry(maddr >> PAGE_SHIFT, mattr, t);
-                    write_pte(&third[third_table_offset(addr)], pte);
+                    p2m_write_pte(&third[third_table_offset(addr)],
+                                  pte, flush_pt);
                     maddr += PAGE_SIZE;
                 }
                 break;
@@ -433,7 +452,8 @@  static int apply_p2m_changes(struct domain *d,
                     count += 0x10;
 
                     memset(&pte, 0x00, sizeof(pte));
-                    write_pte(&third[third_table_offset(addr)], pte);
+                    p2m_write_pte(&third[third_table_offset(addr)],
+                                  pte, flush_pt);
                     count++;
                 }
                 break;
diff --git a/xen/drivers/passthrough/iommu.c b/xen/drivers/passthrough/iommu.c
index 59f1c3e..2faab48 100644
--- a/xen/drivers/passthrough/iommu.c
+++ b/xen/drivers/passthrough/iommu.c
@@ -344,6 +344,17 @@  void iommu_crash_shutdown(void)
     iommu_enabled = iommu_intremap = 0;
 }
 
+bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature)
+{
+    const struct iommu_ops *ops = domain_hvm_iommu(d)->platform_ops;
+    uint32_t features = 0;
+
+    if ( iommu_enabled && ops && ops->features )
+        features = ops->features(d);
+
+    return !!(features & (1 << feature));
+}
+
 static void iommu_dump_p2m_table(unsigned char key)
 {
     struct domain *d;
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index b7481dac..2a763ae 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -67,6 +67,14 @@  int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
                    unsigned int flags);
 int iommu_unmap_page(struct domain *d, unsigned long gfn);
 
+enum iommu_feature
+{
+    IOMMU_FEAT_COHERENT_WALK = 1,
+};
+
+bool_t iommu_has_feature(struct domain *d, enum iommu_feature feature);
+
+
 #ifdef HAS_PCI
 void pt_pci_init(void);
 
@@ -139,6 +147,7 @@  struct iommu_ops {
     void (*iotlb_flush)(struct domain *d, unsigned long gfn, unsigned int page_count);
     void (*iotlb_flush_all)(struct domain *d);
     void (*dump_p2m_table)(struct domain *d);
+    uint32_t (*features)(struct domain *d);
 };
 
 void iommu_suspend(void);