diff mbox

[v5,08/10] xen/arm: Add relinquish_p2m_mapping to remove reference on every mapped page

Message ID 1387215452-10951-9-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall Dec. 16, 2013, 5:37 p.m. UTC
This function will be called when the domain relinquishes its memory.
It removes refcount on every mapped page to a valid MFN.

Currently, Xen doesn't take reference on every new mapping but only for foreign
mapping. Restrict the function only on foreign mapping.

Signed-off-by: Julien Grall <julien.grall@linaro.org>

---
    Changes in v4:
        - Use LPAE_ENTRIES instead of hardcoded value
    Changes in v3:
        - Rework title
        - Reuse create_p2m_entries to remove reference
        - Don't forget to set relmem!
        - Fix compilation (missing include)
    Changes in v2:
        - Introduce the patch
---
 xen/arch/arm/domain.c        |    8 ++++++++
 xen/arch/arm/p2m.c           |   44 +++++++++++++++++++++++++++++++++++++++++-
 xen/include/asm-arm/domain.h |    1 +
 xen/include/asm-arm/p2m.h    |   15 ++++++++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)

Comments

Ian Campbell Dec. 17, 2013, 9:26 a.m. UTC | #1
On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> +             hypercall_preempt_check() )
> +        {

If hypercall_preempt_check returns false the first time it is called
then you won't ever try again. I think you want
	(count+1 % LPAE_ENTRIES) == 0
(the +1 avoids preempting without having done any work yet)

(credit to Jan for pointing this out in his review of Mukesh's x86
variant)

Ian .
Jan Beulich Dec. 17, 2013, 10:03 a.m. UTC | #2
>>> On 17.12.13 at 10:26, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
>> +             hypercall_preempt_check() )
>> +        {
> 
> If hypercall_preempt_check returns false the first time it is called
> then you won't ever try again. I think you want
> 	(count+1 % LPAE_ENTRIES) == 0
> (the +1 avoids preempting without having done any work yet)

And hence needing proper parenthesization:

 	(count+1) % LPAE_ENTRIES == 0

Jan
Ian Campbell Dec. 17, 2013, 10:12 a.m. UTC | #3
On Tue, 2013-12-17 at 10:03 +0000, Jan Beulich wrote:
> >>> On 17.12.13 at 10:26, Ian Campbell <Ian.Campbell@citrix.com> wrote:
> > On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> >> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> >> +             hypercall_preempt_check() )
> >> +        {
> > 
> > If hypercall_preempt_check returns false the first time it is called
> > then you won't ever try again. I think you want
> > 	(count+1 % LPAE_ENTRIES) == 0
> > (the +1 avoids preempting without having done any work yet)
> 
> And hence needing proper parenthesization:
> 
>  	(count+1) % LPAE_ENTRIES == 0

Er, yes, oops!
Ian Campbell Dec. 17, 2013, 11:31 a.m. UTC | #4
On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index d05fdff..45179f7 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -6,6 +6,8 @@
>  #include <xen/bitops.h>
>  #include <asm/flushtlb.h>
>  #include <asm/gic.h>
> +#include <asm/event.h>
> +#include <asm/hardirq.h>
>  
>  /* First level P2M is 2 consecutive pages */
>  #define P2M_FIRST_ORDER 1
> @@ -213,7 +215,8 @@ static int p2m_create_table(struct domain *d,
>  enum p2m_operation {
>      INSERT,
>      ALLOCATE,
> -    REMOVE
> +    REMOVE,
> +    RELINQUISH,
>  };
>  
>  static int create_p2m_entries(struct domain *d,
> @@ -231,6 +234,7 @@ static int create_p2m_entries(struct domain *d,

If this function finds any non-present first or second level PTE then it
will stop and exit (goto out), meaning it will miss any mappings which
are higher up after the hole.

e.g. if you have a guest p2m with RAM at 0-2M and 4-6M then
relinquishing 0-6M will only actually free 0-2M, then abort on 4-6M.

Perhaps this could be fixed by making relinquish_p2m_mapping loop over
the address space relinquishing 2M chunks as it goes? This would remove
the need for the if ( op == RELINQUISH && .. && prempt() ) stuff,
because you could add the preempt in that loop.

BTW, Jan's approach in his x86 fix was to preempt every 2MB of present
or 32MB of non-present mappings, which makes sense because you skip
through the non-present ones more quickly. It'd be nice but lets not
hold up the series if it isn't trivial to achieve.

>      unsigned long cur_first_page = ~0,
>                    cur_first_offset = ~0,
>                    cur_second_offset = ~0;
> +    unsigned long count = 0;
>  
>      spin_lock(&p2m->lock);
>  
> @@ -315,6 +319,7 @@ static int create_p2m_entries(struct domain *d,
>                      maddr += PAGE_SIZE;
>                  }
>                  break;
> +            case RELINQUISH:
>              case REMOVE:
>                  {
>                      lpae_t pte = third[third_table_offset(addr)];
> @@ -338,6 +343,29 @@ static int create_p2m_entries(struct domain *d,
>  
>          if ( flush )
>              flush_tlb_all_local();
> +
> +
> +        count++;
> +
> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> +             hypercall_preempt_check() )
> +        {
> +            p2m->next_gfn_to_relinquish = maddr >> PAGE_SHIFT;
> +            rc = -EAGAIN;
> +            goto out;
> +        }
> +    }
> +
> +    /* When the function will remove mapping, p2m type should always
> +     * be p2m_invalid. */
> +    if ( (t == p2m_ram_rw) || (t == p2m_ram_ro) || (t == p2m_map_foreign))

Is this a slightly odd way of saying either "t != p2m_invalid" or (op ==
CREATE || op == INSERT) ?

> +    {
> +        unsigned long sgfn = paddr_to_pfn(start_gpaddr);
> +        unsigned long egfn = paddr_to_pfn(end_gpaddr);
> +
> +        p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn);
> +        /* Use next_gfn_to_relinquish to store the lowest gfn mapped */
> +        p2m->next_gfn_to_relinquish = MIN(p2m->next_gfn_to_relinquish, sgfn);
>      }
>  
>      rc = 0;
> @@ -523,12 +551,26 @@ int p2m_init(struct domain *d)
>  
>      p2m->first_level = NULL;
>  
> +    p2m->max_mapped_gfn = 0;
> +    p2m->next_gfn_to_relinquish = ULONG_MAX;
> +
>  err:
>      spin_unlock(&p2m->lock);
>  
>      return rc;
>  }
>  
> +int relinquish_p2m_mapping(struct domain *d)
> +{
> +    struct p2m_domain *p2m = &d->arch.p2m;
> +
> +    return create_p2m_entries(d, RELINQUISH,
> +                              pfn_to_paddr(p2m->next_gfn_to_relinquish),
> +                              pfn_to_paddr(p2m->max_mapped_gfn),
> +                              pfn_to_paddr(INVALID_MFN),
> +                              MATTR_MEM, p2m_invalid);
> +}
> +
>  unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>  {
>      paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 53c9895..28d39a0 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -112,6 +112,7 @@ struct arch_domain
>          RELMEM_not_started,
>          RELMEM_xen,
>          RELMEM_page,
> +        RELMEM_mapping,
>          RELMEM_done,
>      } relmem;
>  
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index 5ccfa7f..ac2b6fa 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -18,6 +18,15 @@ struct p2m_domain {
>  
>      /* Current VMID in use */
>      uint8_t vmid;
> +
> +    /* Highest guest frame that's ever been mapped in the p2m
> +     * Take only into account ram and foreign mapping

"Only takes into account ... mappings" or "Takes only ... mappings into
account".


> +     */
> +    unsigned long max_mapped_gfn;
> +
> +    /* When releasing mapped gfn's in a preemptible manner, recall where
> +     * to resume the search */
> +    unsigned long next_gfn_to_relinquish;
>  };
>  
>  /* List of possible type for each page in the p2m entry.
> @@ -48,6 +57,12 @@ int p2m_init(struct domain *d);
>  /* Return all the p2m resources to Xen. */
>  void p2m_teardown(struct domain *d);
>  
> +/* Remove mapping refcount on each mapping page in the p2m
> + *
> + * TODO: For the moment only foreign mapping is handled

"only foreign mappings are handled".

> + */
> +int relinquish_p2m_mapping(struct domain *d);
> +
>  /* Allocate a new p2m table for a domain.
>   *
>   * Returns 0 for success or -errno.
Julien Grall Dec. 17, 2013, 2:08 p.m. UTC | #5
On 12/17/2013 09:26 AM, Ian Campbell wrote:
> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
>> +             hypercall_preempt_check() )
>> +        {
> 
> If hypercall_preempt_check returns false the first time it is called
> then you won't ever try again. I think you want
> 	(count+1 % LPAE_ENTRIES) == 0
> (the +1 avoids preempting without having done any work yet)
> 
> (credit to Jan for pointing this out in his review of Mukesh's x86
> variant)

The code was copied from relinquish_shared_pages
(arch/x86/mem_sharing.c), so if I'm not mistaken this code is also buggy.
Ian Campbell Dec. 17, 2013, 2:13 p.m. UTC | #6
On Tue, 2013-12-17 at 14:08 +0000, Julien Grall wrote:
> On 12/17/2013 09:26 AM, Ian Campbell wrote:
> > On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> >> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> >> +             hypercall_preempt_check() )
> >> +        {
> > 
> > If hypercall_preempt_check returns false the first time it is called
> > then you won't ever try again. I think you want
> > 	(count+1 % LPAE_ENTRIES) == 0
> > (the +1 avoids preempting without having done any work yet)
> > 
> > (credit to Jan for pointing this out in his review of Mukesh's x86
> > variant)
> 
> The code was copied from relinquish_shared_pages
> (arch/x86/mem_sharing.c), so if I'm not mistaken this code is also buggy.

Yes, Jan sent a fix for it this morning.

Ian.
Julien Grall Dec. 17, 2013, 2:40 p.m. UTC | #7
On 12/17/2013 09:26 AM, Ian Campbell wrote:
> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
>> +             hypercall_preempt_check() )
>> +        {
> 
> If hypercall_preempt_check returns false the first time it is called
> then you won't ever try again. I think you want
> 	(count+1 % LPAE_ENTRIES) == 0
> (the +1 avoids preempting without having done any work yet)

I don't think we need +1 because count is increment each round just
before the check.

Is a solution as Jan made for x86 would be more appropriate?
Ian Campbell Dec. 17, 2013, 2:42 p.m. UTC | #8
On Tue, 2013-12-17 at 14:40 +0000, Julien Grall wrote:
> On 12/17/2013 09:26 AM, Ian Campbell wrote:
> > On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> >> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> >> +             hypercall_preempt_check() )
> >> +        {
> > 
> > If hypercall_preempt_check returns false the first time it is called
> > then you won't ever try again. I think you want
> > 	(count+1 % LPAE_ENTRIES) == 0
> > (the +1 avoids preempting without having done any work yet)
> 
> I don't think we need +1 because count is increment each round just
> before the check.

Ah yes, if the increment has already happened then good.

> Is a solution as Jan made for x86 would be more appropriate?

It's about equivalent I think? I'd hope the compiler would take a mod
2^N and turn it into a mask, but I suppose you never know...

Ian.
Julien Grall Dec. 17, 2013, 2:45 p.m. UTC | #9
On 12/17/2013 02:42 PM, Ian Campbell wrote:
> On Tue, 2013-12-17 at 14:40 +0000, Julien Grall wrote:
>> On 12/17/2013 09:26 AM, Ian Campbell wrote:
>>> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
>>>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
>>>> +             hypercall_preempt_check() )
>>>> +        {
>>>
>>> If hypercall_preempt_check returns false the first time it is called
>>> then you won't ever try again. I think you want
>>> 	(count+1 % LPAE_ENTRIES) == 0
>>> (the +1 avoids preempting without having done any work yet)
>>
>> I don't think we need +1 because count is increment each round just
>> before the check.
> 
> Ah yes, if the increment has already happened then good.
> 
>> Is a solution as Jan made for x86 would be more appropriate?
> 
> It's about equivalent I think? I'd hope the compiler would take a mod
> 2^N and turn it into a mask, but I suppose you never know...

I was talking about adding a greater increment when it's a foreign mapping.
Ian Campbell Dec. 17, 2013, 2:52 p.m. UTC | #10
On Tue, 2013-12-17 at 14:45 +0000, Julien Grall wrote:
> On 12/17/2013 02:42 PM, Ian Campbell wrote:
> > On Tue, 2013-12-17 at 14:40 +0000, Julien Grall wrote:
> >> On 12/17/2013 09:26 AM, Ian Campbell wrote:
> >>> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
> >>>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
> >>>> +             hypercall_preempt_check() )
> >>>> +        {
> >>>
> >>> If hypercall_preempt_check returns false the first time it is called
> >>> then you won't ever try again. I think you want
> >>> 	(count+1 % LPAE_ENTRIES) == 0
> >>> (the +1 avoids preempting without having done any work yet)
> >>
> >> I don't think we need +1 because count is increment each round just
> >> before the check.
> > 
> > Ah yes, if the increment has already happened then good.
> > 
> >> Is a solution as Jan made for x86 would be more appropriate?
> > 
> > It's about equivalent I think? I'd hope the compiler would take a mod
> > 2^N and turn it into a mask, but I suppose you never know...
> 
> I was talking about adding a greater increment when it's a foreign mapping.

Like I said in the other subthread:

        BTW, Jan's approach in his x86 fix was to preempt every 2MB of present
        or 32MB of non-present mappings, which makes sense because you skip
        through the non-present ones more quickly. It'd be nice but lets not
        hold up the series if it isn't trivial to achieve.
        
Ian
Julien Grall Dec. 17, 2013, 2:57 p.m. UTC | #11
On 12/17/2013 02:52 PM, Ian Campbell wrote:
> On Tue, 2013-12-17 at 14:45 +0000, Julien Grall wrote:
>> On 12/17/2013 02:42 PM, Ian Campbell wrote:
>>> On Tue, 2013-12-17 at 14:40 +0000, Julien Grall wrote:
>>>> On 12/17/2013 09:26 AM, Ian Campbell wrote:
>>>>> On Mon, 2013-12-16 at 17:37 +0000, Julien Grall wrote:
>>>>>> +        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
>>>>>> +             hypercall_preempt_check() )
>>>>>> +        {
>>>>>
>>>>> If hypercall_preempt_check returns false the first time it is called
>>>>> then you won't ever try again. I think you want
>>>>> 	(count+1 % LPAE_ENTRIES) == 0
>>>>> (the +1 avoids preempting without having done any work yet)
>>>>
>>>> I don't think we need +1 because count is increment each round just
>>>> before the check.
>>>
>>> Ah yes, if the increment has already happened then good.
>>>
>>>> Is a solution as Jan made for x86 would be more appropriate?
>>>
>>> It's about equivalent I think? I'd hope the compiler would take a mod
>>> 2^N and turn it into a mask, but I suppose you never know...
>>
>> I was talking about adding a greater increment when it's a foreign mapping.
> 
> Like I said in the other subthread:
> 
>         BTW, Jan's approach in his x86 fix was to preempt every 2MB of present
>         or 32MB of non-present mappings, which makes sense because you skip
>         through the non-present ones more quickly. It'd be nice but lets not
>         hold up the series if it isn't trivial to achieve.

Sorry I didn't yet read this mail. I will try to implement the same
behavior for the next version.
diff mbox

Patch

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 1590708..4099e88 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -717,6 +717,14 @@  int domain_relinquish_resources(struct domain *d)
         if ( ret )
             return ret;
 
+        d->arch.relmem = RELMEM_mapping;
+        /* Fallthrough */
+
+    case RELMEM_mapping:
+        ret = relinquish_p2m_mapping(d);
+        if ( ret )
+            return ret;
+
         d->arch.relmem = RELMEM_done;
         /* Fallthrough */
 
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index d05fdff..45179f7 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -6,6 +6,8 @@ 
 #include <xen/bitops.h>
 #include <asm/flushtlb.h>
 #include <asm/gic.h>
+#include <asm/event.h>
+#include <asm/hardirq.h>
 
 /* First level P2M is 2 consecutive pages */
 #define P2M_FIRST_ORDER 1
@@ -213,7 +215,8 @@  static int p2m_create_table(struct domain *d,
 enum p2m_operation {
     INSERT,
     ALLOCATE,
-    REMOVE
+    REMOVE,
+    RELINQUISH,
 };
 
 static int create_p2m_entries(struct domain *d,
@@ -231,6 +234,7 @@  static int create_p2m_entries(struct domain *d,
     unsigned long cur_first_page = ~0,
                   cur_first_offset = ~0,
                   cur_second_offset = ~0;
+    unsigned long count = 0;
 
     spin_lock(&p2m->lock);
 
@@ -315,6 +319,7 @@  static int create_p2m_entries(struct domain *d,
                     maddr += PAGE_SIZE;
                 }
                 break;
+            case RELINQUISH:
             case REMOVE:
                 {
                     lpae_t pte = third[third_table_offset(addr)];
@@ -338,6 +343,29 @@  static int create_p2m_entries(struct domain *d,
 
         if ( flush )
             flush_tlb_all_local();
+
+
+        count++;
+
+        if ( op == RELINQUISH && count == LPAE_ENTRIES &&
+             hypercall_preempt_check() )
+        {
+            p2m->next_gfn_to_relinquish = maddr >> PAGE_SHIFT;
+            rc = -EAGAIN;
+            goto out;
+        }
+    }
+
+    /* When the function will remove mapping, p2m type should always
+     * be p2m_invalid. */
+    if ( (t == p2m_ram_rw) || (t == p2m_ram_ro) || (t == p2m_map_foreign))
+    {
+        unsigned long sgfn = paddr_to_pfn(start_gpaddr);
+        unsigned long egfn = paddr_to_pfn(end_gpaddr);
+
+        p2m->max_mapped_gfn = MAX(p2m->max_mapped_gfn, egfn);
+        /* Use next_gfn_to_relinquish to store the lowest gfn mapped */
+        p2m->next_gfn_to_relinquish = MIN(p2m->next_gfn_to_relinquish, sgfn);
     }
 
     rc = 0;
@@ -523,12 +551,26 @@  int p2m_init(struct domain *d)
 
     p2m->first_level = NULL;
 
+    p2m->max_mapped_gfn = 0;
+    p2m->next_gfn_to_relinquish = ULONG_MAX;
+
 err:
     spin_unlock(&p2m->lock);
 
     return rc;
 }
 
+int relinquish_p2m_mapping(struct domain *d)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+
+    return create_p2m_entries(d, RELINQUISH,
+                              pfn_to_paddr(p2m->next_gfn_to_relinquish),
+                              pfn_to_paddr(p2m->max_mapped_gfn),
+                              pfn_to_paddr(INVALID_MFN),
+                              MATTR_MEM, p2m_invalid);
+}
+
 unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
 {
     paddr_t p = p2m_lookup(d, pfn_to_paddr(gpfn), NULL);
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 53c9895..28d39a0 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -112,6 +112,7 @@  struct arch_domain
         RELMEM_not_started,
         RELMEM_xen,
         RELMEM_page,
+        RELMEM_mapping,
         RELMEM_done,
     } relmem;
 
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index 5ccfa7f..ac2b6fa 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -18,6 +18,15 @@  struct p2m_domain {
 
     /* Current VMID in use */
     uint8_t vmid;
+
+    /* Highest guest frame that's ever been mapped in the p2m
+     * Take only into account ram and foreign mapping
+     */
+    unsigned long max_mapped_gfn;
+
+    /* When releasing mapped gfn's in a preemptible manner, recall where
+     * to resume the search */
+    unsigned long next_gfn_to_relinquish;
 };
 
 /* List of possible type for each page in the p2m entry.
@@ -48,6 +57,12 @@  int p2m_init(struct domain *d);
 /* Return all the p2m resources to Xen. */
 void p2m_teardown(struct domain *d);
 
+/* Remove mapping refcount on each mapping page in the p2m
+ *
+ * TODO: For the moment only foreign mapping is handled
+ */
+int relinquish_p2m_mapping(struct domain *d);
+
 /* Allocate a new p2m table for a domain.
  *
  * Returns 0 for success or -errno.