diff mbox

[Xen-devel,RFC,v2,5/6] xen/arm: Implement hypercall for dirty page tracing

Message ID 1397595918-30419-13-git-send-email-w1.huang@samsung.com
State New
Headers show

Commit Message

Wei Huang April 15, 2014, 9:05 p.m. UTC
From: Jaeyong Yoo <jaeyong.yoo@samsung.com>

This patch adds hypercall for shadow operations, including enable/disable
and clean/peek dirty page bitmap.

The design consists of two parts: dirty page detecting and saving. For
detecting, we setup the guest p2m's leaf PTE read-only and whenever
the guest tries to write something, permission fault happens and traps
into xen. The permission-faulted GPA should be saved for the toolstack,
which checks which pages are dirty. For this purpose, it temporarily saves
the GPAs into bitmap.

Signed-off-by: Jaeyong Yoo <jaeyong.yoo@samsung.com>
Signed-off-by: Wei Huang <w1.huang@samsung.com>
---
 xen/arch/arm/domain.c           |   14 +++
 xen/arch/arm/domctl.c           |   21 ++++
 xen/arch/arm/mm.c               |  105 ++++++++++++++++++-
 xen/arch/arm/p2m.c              |  211 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/traps.c            |   11 ++
 xen/include/asm-arm/domain.h    |    7 ++
 xen/include/asm-arm/mm.h        |   10 ++
 xen/include/asm-arm/p2m.h       |    8 +-
 xen/include/asm-arm/processor.h |    2 +
 9 files changed, 387 insertions(+), 2 deletions(-)

Comments

Julien Grall April 15, 2014, 11:35 p.m. UTC | #1
Hi Wei,

Thank you for the patch.

On 15/04/14 22:05, Wei Huang wrote:
>   long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
>                       XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
>   {
> +    long ret = 0;
> +
>       switch ( domctl->cmd )
>       {
> +    case XEN_DOMCTL_shadow_op:
> +    {
> +        if ( d == current->domain ) /* no domain_pause() */

Error message here?

> +            return -EINVAL;
> +
> +        domain_pause(d);
> +        ret = dirty_mode_op(d, &domctl->u.shadow_op);
> +        domain_unpause(d);

Why do you pause/unpause the domain? Also you have to check that:
     - You have the right via XSM.
     - The domain is not dying...

See paging_domctl in arch/x86/p2m/paging.c

[..]

> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index 403fd89..d57a44a 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -6,6 +6,8 @@
>   #include <xen/bitops.h>
>   #include <asm/flushtlb.h>
>   #include <asm/gic.h>
> +#include <xen/guest_access.h>
> +#include <xen/pfn.h>
>   #include <asm/event.h>
>   #include <asm/hardirq.h>
>   #include <asm/page.h>
> @@ -208,6 +210,7 @@ static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr,
>           break;
>
>       case p2m_ram_ro:
> +    case p2m_ram_logdirty:
>           e.p2m.xn = 0;
>           e.p2m.write = 0;
>           break;
> @@ -261,6 +264,10 @@ static int p2m_create_table(struct domain *d,
>
>       pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
>
> +    /* mark the write bit (page table's case, ro bit) as 0. So it is writable

mark the entry as write-only?

> +     * in case of vlpt access */
> +    pte.pt.ro = 0;
> +
>       write_pte(entry, pte);
>
>       return 0;
> @@ -697,6 +704,210 @@ unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
>       return p >> PAGE_SHIFT;
>   }

> +/* Change types across all p2m entries in a domain */
> +void p2m_change_entry_type_global(struct domain *d, enum mg nt)
> +{

Can't you reuse apply_p2m_changes? I'm also concerned about preemption. 
This function might be very long to run (depending on the size of the 
memory).

> +    struct p2m_domain *p2m = &d->arch.p2m;
> +    paddr_t ram_base;
> +    int i1, i2, i3;
> +    int first_index, second_index, third_index;
> +    lpae_t *first = __map_domain_page(p2m->first_level);
> +    lpae_t pte, *second = NULL, *third = NULL;
> +
> +    domain_get_gpfn_range(d, &ram_base, NULL);

Just careful, begin and end correspond to the bound for all addresses 
mapped in the guest (i.e RAM, MMIO, foreign page, grant table...).

We don't want to log other type than RAM.

Modifying the behavior of {max,lowest}_mapped_gfn won't work because the 
guest might use different banks with MMIO region.

> +    first_index = first_table_offset((uint64_t)ram_base);
> +    second_index = second_table_offset((uint64_t)ram_base);
> +    third_index = third_table_offset((uint64_t)ram_base);
> +
> +    BUG_ON( !first && "Can't map first level p2m." );
> +

This BUG_ON is not necessary. __map_domain_page always return a valid 
pointer.

> +    spin_lock(&p2m->lock);
> +
> +    for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 )
> +    {
> +        lpae_walk_t first_pte = first[i1].walk;
> +
> +        if ( !first_pte.valid || !first_pte.table )
> +            goto out;
> +
> +        second = map_domain_page(first_pte.base);
> +        BUG_ON( !second && "Can't map second level p2m.");
> +
> +        for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 )
> +        {
> +            lpae_walk_t second_pte = second[i2].walk;
> +
> +            if ( !second_pte.valid || !second_pte.table )
> +                goto out;
> +
> +            third = map_domain_page(second_pte.base);
> +            BUG_ON( !third && "Can't map third level p2m.");
> +
> +            for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 )
> +            {
> +
> +                lpae_walk_t third_pte = third[i3].walk;
> +                if ( !third_pte.valid )
> +                    goto out;
> +
> +                pte = third[i3];
> +                if ( nt == mg_ro )

It will use a switch case for nt. It will be clearer and easier to extend.

> +                {
> +                    if ( pte.p2m.write == 1 )

As said earlier, this will trap everything that is writeable. We only 
want trap RAM.

I would replace by if ( pte.p2m.type == p2m_ram ).

> +                    {
> +                        pte.p2m.write = 0;
> +                        pte.p2m.type = p2m_ram_logdirty;
> +                    }
> +                    else
> +                    {
> +                        /* reuse avail bit as an indicator of 'actual'
> +                         * read-only */
> +                        pte.p2m.type = p2m_ram_rw;

Why do you unconditionally change the type?

> +                    }
> +                }
> +                else if ( nt == mg_rw )
> +                {
> +                    if ( pte.p2m.write == 0 &&
> +                         pte.p2m.type == p2m_ram_logdirty )

Can you add a comment to say what does this if means?

> +                    {
> +                        pte.p2m.write = p2m_ram_rw;

Wrong field?

> +                    }
> +                }
> +                write_pte(&third[i3], pte);
> +            }
> +            unmap_domain_page(third);
> +
> +            third = NULL;
> +            third_index = 0;
> +        }
> +        unmap_domain_page(second);
> +
> +        second = NULL;
> +        second_index = 0;
> +        third_index = 0;
> +    }
> +
> +out:
> +    flush_tlb_all_local();

you want to flush the P2M on every CPU and only for the current VMID.


You should use flush_tlb(). You might need to switch to the DOMID before.

[..]

> diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
> index a7edc4e..cca34e9 100644
> --- a/xen/arch/arm/traps.c
> +++ b/xen/arch/arm/traps.c
> @@ -1491,6 +1491,8 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>       struct hsr_dabt dabt = hsr.dabt;
>       int rc;
>       mmio_info_t info;
> +    int page_fault = ( (dabt.dfsc & FSC_MASK) ==
> +                       (FSC_FLT_PERM | FSC_3D_LEVEL) && dabt.write );
>
>       if ( !check_conditional_instr(regs, hsr) )
>       {
> @@ -1512,6 +1514,15 @@ static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
>       if ( rc == -EFAULT )
>           goto bad_data_abort;
>
> +    /* domU page fault handling for guest live migration. dabt.valid can be

I would remove domU in this comment.

> +     * 0 here.
> +     */
> +    if ( page_fault && handle_page_fault(current->domain, info.gpa) )
> +    {
> +        /* Do not modify pc after page fault to repeat memory operation */
> +        return;
> +    }
> +
>       /* XXX: Decode the instruction if ISS is not valid */
>       if ( !dabt.valid )
>           goto bad_data_abort;

[..]

> diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
> index 5fd684f..5f9478b 100644
> --- a/xen/include/asm-arm/mm.h
> +++ b/xen/include/asm-arm/mm.h
> @@ -343,10 +343,18 @@ static inline void put_page_and_type(struct page_info *page)
>       put_page(page);
>   }
>
> +enum mg { mg_clear, mg_ro, mg_rw, mg_rx };

Please describe this enum. Also mg is too generic.

> +
> +/* routine for dirty-page tracing */
> +int handle_page_fault(struct domain *d, paddr_t addr);
> +
>   int prepare_vlpt(struct domain *d);
>   void cleanup_vlpt(struct domain *d);
>   void restore_vlpt(struct domain *d);
>
> +int prepare_bitmap(struct domain *d);
> +void cleanup_bitmap(struct domain *d);

{prepare,cleanup} bitmap of what? The name is too generic here.

Regards.
Julien Grall April 23, 2014, 11:59 a.m. UTC | #2
Hi Wei,

On 04/15/2014 10:05 PM, Wei Huang wrote:
> diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
> index 3f04a77..d2531ed 100644
> --- a/xen/arch/arm/domain.c
> +++ b/xen/arch/arm/domain.c
> @@ -207,6 +207,12 @@ static void ctxt_switch_to(struct vcpu *n)
>  
>      isb();
>  
> +    /* Dirty-page tracing
> +     * NB: How do we consider SMP case?
> +     */
> +    if ( n->domain->arch.dirty.mode )
> +        restore_vlpt(n->domain);
> +

I though a bit more of this piece of code.

Your VLPT implementation uses xen_second which is shared between every
vCPU. Therefore restoring VLPT is pointless here.

In another hand, I didn't see anything which prevent you to migrate 2
domain at the same time. You will likely crash one of (if not both) the
guests.

Regards,
diff mbox

Patch

diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 3f04a77..d2531ed 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -207,6 +207,12 @@  static void ctxt_switch_to(struct vcpu *n)
 
     isb();
 
+    /* Dirty-page tracing
+     * NB: How do we consider SMP case?
+     */
+    if ( n->domain->arch.dirty.mode )
+        restore_vlpt(n->domain);
+
     /* This is could trigger an hardware interrupt from the virtual
      * timer. The interrupt needs to be injected into the guest. */
     virt_timer_restore(n);
@@ -502,11 +508,19 @@  int arch_domain_create(struct domain *d, unsigned int domcr_flags)
     /* Default the virtual ID to match the physical */
     d->arch.vpidr = boot_cpu_data.midr.bits;
 
+    /* init for dirty-page tracing */
+    d->arch.dirty.count = 0;
+    d->arch.dirty.mode = 0;
+    spin_lock_init(&d->arch.dirty.lock);
+
     d->arch.dirty.second_lvl_start = 0;
     d->arch.dirty.second_lvl_end = 0;
     d->arch.dirty.second_lvl[0] = NULL;
     d->arch.dirty.second_lvl[1] = NULL;
 
+    memset(d->arch.dirty.bitmap, 0, sizeof(d->arch.dirty.bitmap));
+    d->arch.dirty.bitmap_pages = 0;
+
     clear_page(d->shared_info);
     share_xen_page_with_guest(
         virt_to_page(d->shared_info), d, XENSHARE_writable);
diff --git a/xen/arch/arm/domctl.c b/xen/arch/arm/domctl.c
index 45974e7..e84651f 100644
--- a/xen/arch/arm/domctl.c
+++ b/xen/arch/arm/domctl.c
@@ -11,12 +11,33 @@ 
 #include <xen/sched.h>
 #include <xen/hypercall.h>
 #include <public/domctl.h>
+#include <xen/hvm/save.h>
+#include <xen/guest_access.h>
+
 
 long arch_do_domctl(struct xen_domctl *domctl, struct domain *d,
                     XEN_GUEST_HANDLE_PARAM(xen_domctl_t) u_domctl)
 {
+    long ret = 0;
+
     switch ( domctl->cmd )
     {
+    case XEN_DOMCTL_shadow_op:
+    {
+        if ( d == current->domain ) /* no domain_pause() */
+            return -EINVAL;
+                                          
+        domain_pause(d);
+        ret = dirty_mode_op(d, &domctl->u.shadow_op);
+        domain_unpause(d);
+
+        if ( __copy_to_guest(u_domctl, domctl, 1) )
+            ret = -EFAULT;
+
+        return ret;
+    }
+    break;
+
     case XEN_DOMCTL_cacheflush:
     {
         unsigned long s = domctl->u.cacheflush.start_pfn;
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index a315752..ae852eb 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -981,7 +981,6 @@  void destroy_xen_mappings(unsigned long v, unsigned long e)
     create_xen_entries(REMOVE, v, 0, (e - v) >> PAGE_SHIFT, 0);
 }
 
-enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
 static void set_pte_flags_on_range(const char *p, unsigned long l, enum mg mg)
 {
     lpae_t pte;
@@ -1370,6 +1369,110 @@  void domain_get_gpfn_range(struct domain *d, paddr_t *start, paddr_t *end)
         *end = GUEST_RAM_BASE + ((paddr_t) p2m->max_mapped_gfn);
 }
 
+static inline void mark_dirty_bitmap(struct domain *d, paddr_t addr)
+{
+    paddr_t ram_base = (paddr_t) GUEST_RAM_BASE;
+    int bit_index = PFN_DOWN(addr - ram_base);
+    int page_index = bit_index >> (PAGE_SHIFT + 3);
+    int bit_index_residual = bit_index & ((1ul << (PAGE_SHIFT + 3)) - 1);
+
+    set_bit(bit_index_residual, d->arch.dirty.bitmap[page_index]);
+}
+
+/* Routine for dirty-page tracing
+ *
+ * On first write, it page faults, its entry is changed to read-write,
+ * and on retry the write succeeds. For locating p2m of the faulting entry,
+ * we use virtual-linear page table.
+ *
+ * Returns zero if addr is not valid or dirty mode is not set
+ */
+int handle_page_fault(struct domain *d, paddr_t addr)
+{
+
+    lpae_t *vlp2m_pte = 0;
+    paddr_t gma_start = 0;
+    paddr_t gma_end = 0;
+
+    if ( !d->arch.dirty.mode )
+        return 0;
+
+    domain_get_gpfn_range(d, &gma_start, &gma_end);
+    /* Ensure that addr is inside guest's RAM */
+    if ( addr < gma_start || addr > gma_end )
+        return 0;
+
+    vlp2m_pte = get_vlpt_3lvl_pte(addr);
+    if ( vlp2m_pte->p2m.valid && vlp2m_pte->p2m.write == 0 &&
+         vlp2m_pte->p2m.type == p2m_ram_logdirty )
+    {
+        lpae_t pte = *vlp2m_pte;
+        pte.p2m.write = 1;
+        write_pte(vlp2m_pte, pte);
+        flush_tlb_local();
+
+        /* only necessary to lock between get-dirty bitmap and mark dirty
+         * bitmap. If get-dirty bitmap happens immediately before this
+         * lock, the corresponding dirty-page would be marked at the next
+         * round of get-dirty bitmap */
+        spin_lock(&d->arch.dirty.lock);
+        mark_dirty_bitmap(d, addr);
+        spin_unlock(&d->arch.dirty.lock);
+    }
+
+    return 1;
+}
+
+int prepare_bitmap(struct domain *d)
+{
+    paddr_t gma_start = 0;
+    paddr_t gma_end = 0;
+    int nr_bytes;
+    int nr_pages;
+    int i;
+
+    domain_get_gpfn_range(d, &gma_start, &gma_end);
+
+    nr_bytes = (PFN_DOWN(gma_end - gma_start) + 7) / 8;
+    nr_pages = (nr_bytes + PAGE_SIZE - 1) / PAGE_SIZE;
+
+    BUG_ON( nr_pages > MAX_DIRTY_BITMAP_PAGES );
+
+    for ( i = 0; i < nr_pages; ++i )
+    {
+        struct page_info *page;
+
+        page = alloc_domheap_page(NULL, 0);
+        if ( page == NULL )
+            goto cleanup_on_failure;
+
+        d->arch.dirty.bitmap[i] = map_domain_page_global(__page_to_mfn(page));
+        clear_page(d->arch.dirty.bitmap[i]);
+    }
+
+    d->arch.dirty.bitmap_pages = nr_pages;
+    return 0;
+
+cleanup_on_failure:
+    nr_pages = i;
+    for ( i = 0; i < nr_pages; ++i )
+    {
+        unmap_domain_page_global(d->arch.dirty.bitmap[i]);
+    }
+
+    return -ENOMEM;
+}
+
+void cleanup_bitmap(struct domain *d)
+{
+    int i;
+
+    for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+    {
+        unmap_domain_page_global(d->arch.dirty.bitmap[i]);
+    }
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index 403fd89..d57a44a 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -6,6 +6,8 @@ 
 #include <xen/bitops.h>
 #include <asm/flushtlb.h>
 #include <asm/gic.h>
+#include <xen/guest_access.h>
+#include <xen/pfn.h>
 #include <asm/event.h>
 #include <asm/hardirq.h>
 #include <asm/page.h>
@@ -208,6 +210,7 @@  static lpae_t mfn_to_p2m_entry(unsigned long mfn, unsigned int mattr,
         break;
 
     case p2m_ram_ro:
+    case p2m_ram_logdirty:
         e.p2m.xn = 0;
         e.p2m.write = 0;
         break;
@@ -261,6 +264,10 @@  static int p2m_create_table(struct domain *d,
 
     pte = mfn_to_p2m_entry(page_to_mfn(page), MATTR_MEM, p2m_invalid);
 
+    /* mark the write bit (page table's case, ro bit) as 0. So it is writable 
+     * in case of vlpt access */
+    pte.pt.ro = 0;
+
     write_pte(entry, pte);
 
     return 0;
@@ -697,6 +704,210 @@  unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
     return p >> PAGE_SHIFT;
 }
 
+/* Change types across all p2m entries in a domain */
+void p2m_change_entry_type_global(struct domain *d, enum mg nt)
+{
+    struct p2m_domain *p2m = &d->arch.p2m;
+    paddr_t ram_base;
+    int i1, i2, i3;
+    int first_index, second_index, third_index;
+    lpae_t *first = __map_domain_page(p2m->first_level);
+    lpae_t pte, *second = NULL, *third = NULL;
+
+    domain_get_gpfn_range(d, &ram_base, NULL);
+
+    first_index = first_table_offset((uint64_t)ram_base);
+    second_index = second_table_offset((uint64_t)ram_base);
+    third_index = third_table_offset((uint64_t)ram_base);
+
+    BUG_ON( !first && "Can't map first level p2m." );
+
+    spin_lock(&p2m->lock);
+
+    for ( i1 = first_index; i1 < LPAE_ENTRIES*2; ++i1 )
+    {
+        lpae_walk_t first_pte = first[i1].walk;
+
+        if ( !first_pte.valid || !first_pte.table )
+            goto out;
+
+        second = map_domain_page(first_pte.base);
+        BUG_ON( !second && "Can't map second level p2m.");
+
+        for ( i2 = second_index; i2 < LPAE_ENTRIES; ++i2 )
+        {
+            lpae_walk_t second_pte = second[i2].walk;
+
+            if ( !second_pte.valid || !second_pte.table )
+                goto out;
+
+            third = map_domain_page(second_pte.base);
+            BUG_ON( !third && "Can't map third level p2m.");
+
+            for ( i3 = third_index; i3 < LPAE_ENTRIES; ++i3 )
+            {
+
+                lpae_walk_t third_pte = third[i3].walk;
+                if ( !third_pte.valid )
+                    goto out;
+
+                pte = third[i3];
+                if ( nt == mg_ro )
+                {
+                    if ( pte.p2m.write == 1 )
+                    {
+                        pte.p2m.write = 0;
+                        pte.p2m.type = p2m_ram_logdirty;
+                    }
+                    else
+                    {
+                        /* reuse avail bit as an indicator of 'actual'
+                         * read-only */
+                        pte.p2m.type = p2m_ram_rw;
+                    }
+                }
+                else if ( nt == mg_rw )
+                {
+                    if ( pte.p2m.write == 0 &&
+                         pte.p2m.type == p2m_ram_logdirty )
+                    {
+                        pte.p2m.write = p2m_ram_rw;
+                    }
+                }
+                write_pte(&third[i3], pte);
+            }
+            unmap_domain_page(third);
+
+            third = NULL;
+            third_index = 0;
+        }
+        unmap_domain_page(second);
+
+        second = NULL;
+        second_index = 0;
+        third_index = 0;
+    }
+
+out:
+    flush_tlb_all_local();
+    if ( third ) unmap_domain_page(third);
+    if ( second ) unmap_domain_page(second);
+    if ( first ) unmap_domain_page(first);
+
+    spin_unlock(&p2m->lock);
+}
+
+/* Read a domain's log-dirty bitmap and stats.
+ * If the operation is a CLEAN, clear the bitmap and stats. */
+int log_dirty_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+    int peek = 1;
+    int i;
+    int bitmap_size;
+    paddr_t gma_start, gma_end;
+
+    /* this hypercall is called from domain 0, and we don't know which guest's
+     * vlpt is mapped in xen_second, so, to be sure, we restore vlpt here */
+    restore_vlpt(d);
+
+    domain_get_gpfn_range(d, &gma_start, &gma_end);
+    bitmap_size = (gma_end - gma_start) / 8;
+
+    if ( guest_handle_is_null(sc->dirty_bitmap) )
+    {
+        peek = 0;
+    }
+    else
+    {
+        spin_lock(&d->arch.dirty.lock);
+        for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+        {
+            int j = 0;
+            uint8_t *bitmap;
+            copy_to_guest_offset(sc->dirty_bitmap, i * PAGE_SIZE,
+                                 d->arch.dirty.bitmap[i],
+                                 bitmap_size < PAGE_SIZE ? bitmap_size :
+                                                           PAGE_SIZE);
+            bitmap_size -= PAGE_SIZE;
+
+            /* set p2m page table read-only */
+            bitmap = d->arch.dirty.bitmap[i];
+            while ((j = find_next_bit((const long unsigned int *)bitmap,
+                                      PAGE_SIZE*8, j)) < PAGE_SIZE*8)
+            {
+                lpae_t *vlpt;
+                paddr_t addr = gma_start + (i << (2*PAGE_SHIFT+3)) +
+                    (j << PAGE_SHIFT);
+                vlpt = get_vlpt_3lvl_pte(addr);
+                vlpt->p2m.write = 0;
+                j++;
+            }
+        }
+
+        if ( sc->op == XEN_DOMCTL_SHADOW_OP_CLEAN )
+        {
+            for ( i = 0; i < d->arch.dirty.bitmap_pages; ++i )
+            {
+                clear_page(d->arch.dirty.bitmap[i]);
+            }
+        }
+
+        spin_unlock(&d->arch.dirty.lock);
+        flush_tlb_local();
+    }
+
+    sc->stats.dirty_count = d->arch.dirty.count;
+
+    return 0;
+}
+
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc)
+{
+    long ret = 0;
+    switch (sc->op)
+    {
+        case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+        case XEN_DOMCTL_SHADOW_OP_OFF:
+        {
+            enum mg nt = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? mg_rw : mg_ro;
+
+            d->arch.dirty.mode = sc->op == XEN_DOMCTL_SHADOW_OP_OFF ? 0 : 1;
+            p2m_change_entry_type_global(d, nt);
+
+            if ( sc->op == XEN_DOMCTL_SHADOW_OP_OFF )
+            {
+                cleanup_vlpt(d);
+                cleanup_bitmap(d);
+            }
+            else
+            {
+                if ( (ret = prepare_vlpt(d)) )
+                   return ret;
+
+                if ( (ret = prepare_bitmap(d)) )
+                {
+                   /* in case of failure, we have to cleanup vlpt */
+                   cleanup_vlpt(d);
+                   return ret;
+                }
+            }
+        }
+        break;
+
+        case XEN_DOMCTL_SHADOW_OP_CLEAN:
+        case XEN_DOMCTL_SHADOW_OP_PEEK:
+        {
+            ret = log_dirty_op(d, sc);
+        }
+        break;
+
+        default:
+            return -ENOSYS;
+    }
+
+    return ret;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/traps.c b/xen/arch/arm/traps.c
index a7edc4e..cca34e9 100644
--- a/xen/arch/arm/traps.c
+++ b/xen/arch/arm/traps.c
@@ -1491,6 +1491,8 @@  static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
     struct hsr_dabt dabt = hsr.dabt;
     int rc;
     mmio_info_t info;
+    int page_fault = ( (dabt.dfsc & FSC_MASK) ==
+                       (FSC_FLT_PERM | FSC_3D_LEVEL) && dabt.write );
 
     if ( !check_conditional_instr(regs, hsr) )
     {
@@ -1512,6 +1514,15 @@  static void do_trap_data_abort_guest(struct cpu_user_regs *regs,
     if ( rc == -EFAULT )
         goto bad_data_abort;
 
+    /* domU page fault handling for guest live migration. dabt.valid can be 
+     * 0 here.
+     */
+    if ( page_fault && handle_page_fault(current->domain, info.gpa) )
+    {
+        /* Do not modify pc after page fault to repeat memory operation */
+        return;
+    }
+
     /* XXX: Decode the instruction if ISS is not valid */
     if ( !dabt.valid )
         goto bad_data_abort;
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 5321bd6..99f9f51 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -163,9 +163,16 @@  struct arch_domain
 
     /* dirty-page tracing */
     struct {
+#define MAX_DIRTY_BITMAP_PAGES 64        /* support upto 8GB guest memory */
+        spinlock_t lock;                 /* protect list: head, mvn_head */
+        volatile int mode;               /* 1 if dirty pages tracing enabled */
+        volatile unsigned int count;     /* dirty pages counter */
         volatile int second_lvl_start;   /* for context switch */
         volatile int second_lvl_end;
         lpae_t *second_lvl[2];           /* copy of guest p2m's first */
+        /* dirty bitmap */
+        uint8_t *bitmap[MAX_DIRTY_BITMAP_PAGES];
+        int bitmap_pages;                /* number of dirty bitmap pages */
     } dirty;
 
     unsigned int evtchn_irq;
diff --git a/xen/include/asm-arm/mm.h b/xen/include/asm-arm/mm.h
index 5fd684f..5f9478b 100644
--- a/xen/include/asm-arm/mm.h
+++ b/xen/include/asm-arm/mm.h
@@ -343,10 +343,18 @@  static inline void put_page_and_type(struct page_info *page)
     put_page(page);
 }
 
+enum mg { mg_clear, mg_ro, mg_rw, mg_rx };
+
+/* routine for dirty-page tracing */
+int handle_page_fault(struct domain *d, paddr_t addr);
+
 int prepare_vlpt(struct domain *d);
 void cleanup_vlpt(struct domain *d);
 void restore_vlpt(struct domain *d);
 
+int prepare_bitmap(struct domain *d);
+void cleanup_bitmap(struct domain *d);
+
 /* calculate the xen's virtual address for accessing the leaf PTE of
  * a given address (GPA) */
 static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr)
@@ -359,6 +367,8 @@  static inline lpae_t * get_vlpt_3lvl_pte(paddr_t addr)
     return &table[addr >> PAGE_SHIFT];
 }
 
+void get_gma_start_end(struct domain *d, paddr_t *start, paddr_t *end);
+
 #endif /*  __ARCH_ARM_MM__ */
 /*
  * Local variables:
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index bd71abe..0cecbe7 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -2,6 +2,7 @@ 
 #define _XEN_P2M_H
 
 #include <xen/mm.h>
+#include <public/domctl.h>
 
 struct domain;
 
@@ -41,6 +42,7 @@  typedef enum {
     p2m_invalid = 0,    /* Nothing mapped here */
     p2m_ram_rw,         /* Normal read/write guest RAM */
     p2m_ram_ro,         /* Read-only; writes are silently dropped */
+    p2m_ram_logdirty,   /* Read-only: special mode for log dirty */
     p2m_mmio_direct,    /* Read/write mapping of genuine MMIO area */
     p2m_map_foreign,    /* Ram pages from foreign domain */
     p2m_grant_map_rw,   /* Read/write grant mapping */
@@ -49,7 +51,8 @@  typedef enum {
 } p2m_type_t;
 
 #define p2m_is_foreign(_t)  ((_t) == p2m_map_foreign)
-#define p2m_is_ram(_t)      ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro)
+#define p2m_is_ram(_t)      ((_t) == p2m_ram_rw || (_t) == p2m_ram_ro ||  \
+                             (_t) == p2m_ram_logdirty)
 
 /* Initialise vmid allocator */
 void p2m_vmid_allocator_init(void);
@@ -178,6 +181,9 @@  static inline int get_page_and_type(struct page_info *page,
     return rc;
 }
 
+void p2m_change_entry_type_global(struct domain *d, enum mg nt);
+long dirty_mode_op(struct domain *d, xen_domctl_shadow_op_t *sc);
+
 #endif /* _XEN_P2M_H */
 
 /*
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 06e638f..9dc49c3 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -399,6 +399,8 @@  union hsr {
 #define FSC_CPR        (0x3a) /* Coprocossor Abort */
 
 #define FSC_LL_MASK    (_AC(0x03,U)<<0)
+#define FSC_MASK       (0x3f) /* Fault status mask */
+#define FSC_3D_LEVEL   (0x03) /* Third level fault*/
 
 /* Time counter hypervisor control register */
 #define CNTHCTL_PA      (1u<<0)  /* Kernel/user access to physical counter */