diff mbox

[Xen-devel,v2] xen/arm: Add support for 16 bit VMIDs

Message ID 1480069983-21138-1-git-send-email-bhupinder.thakur@linaro.org
State New
Headers show

Commit Message

Bhupinder Thakur Nov. 25, 2016, 10:33 a.m. UTC
VMID space is increased to 16-bits from 8-bits in ARMv8 8.1 revision.
This allows more than 256 VMs to be supported by Xen.

This change adds support for 16-bit VMIDs in Xen based on whether the
architecture supports it.

Signed-off-by: Bhupinder Thakur <bhupinder.thakur@linaro.org>
Reviewed-by: Julien Grall <julien.grall@arm.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
---
 xen/arch/arm/p2m.c              | 68 ++++++++++++++++++++++++++++++++++++-----
 xen/arch/arm/setup.c            |  3 +-
 xen/include/asm-arm/p2m.h       |  4 +--
 xen/include/asm-arm/processor.h | 18 ++++++++++-
 4 files changed, 82 insertions(+), 11 deletions(-)

Comments

Julien Grall Nov. 29, 2016, 12:55 p.m. UTC | #1
Hi Bhupinder,

On 25/11/16 10:33, Bhupinder Thakur wrote:
> VMID space is increased to 16-bits from 8-bits in ARMv8 8.1 revision.
> This allows more than 256 VMs to be supported by Xen.
>
> This change adds support for 16-bit VMIDs in Xen based on whether the
> architecture supports it.
>
> Signed-off-by: Bhupinder Thakur <bhupinder.thakur@linaro.org>
> Reviewed-by: Julien Grall <julien.grall@arm.com>
> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>

The tag reviewed-by has a strong meaning, it means that the person has 
fully reviewed the code and he agrees with the modifications (i.e the 
code is good to be pushed). For more details see a good description in 
in Linux doc (see [1]).

In this case, neither Stefano nor I gave it. So please drop them until 
one of us formally gave him. (e.g Reviewed-by: Name <email>).

> ---
>  xen/arch/arm/p2m.c              | 68 ++++++++++++++++++++++++++++++++++++-----
>  xen/arch/arm/setup.c            |  3 +-
>  xen/include/asm-arm/p2m.h       |  4 +--
>  xen/include/asm-arm/processor.h | 18 ++++++++++-
>  4 files changed, 82 insertions(+), 11 deletions(-)
>
> diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
> index cc5634b..f036cfb 100644
> --- a/xen/arch/arm/p2m.c
> +++ b/xen/arch/arm/p2m.c
> @@ -7,6 +7,7 @@
>  #include <xen/vm_event.h>
>  #include <xen/monitor.h>
>  #include <xen/iocap.h>
> +#include <xen/xmalloc.h>
>  #include <public/vm_event.h>
>  #include <asm/flushtlb.h>
>  #include <asm/gic.h>
> @@ -19,6 +20,7 @@ static unsigned int __read_mostly p2m_root_order;
>  static unsigned int __read_mostly p2m_root_level;
>  #define P2M_ROOT_ORDER    p2m_root_order
>  #define P2M_ROOT_LEVEL p2m_root_level
> +static unsigned int __read_mostly max_vmid;

I would much prefer if max_vmid and MAX_VMID are defined together.

>  #else
>  /* First level P2M is alway 2 consecutive pages */
>  #define P2M_ROOT_LEVEL 1
> @@ -1219,7 +1221,7 @@ static int p2m_alloc_table(struct domain *d)
>
>      p2m->root = page;
>
> -    p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid & 0xff) << 48;
> +    p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid << 48);
>
>      /*
>       * Make sure that all TLBs corresponding to the new VMID are flushed
> @@ -1230,20 +1232,68 @@ static int p2m_alloc_table(struct domain *d)
>      return 0;
>  }
>
> -#define MAX_VMID 256
> +#define MAX_VMID_8_BIT  (1UL << 8)
> +#define MAX_VMID_16_BIT (1UL << 16)
> +
> +#ifdef CONFIG_ARM_64
> +#define MAX_VMID        max_vmid
> +#else
> +#define MAX_VMID        MAX_VMID_8_BIT
> +#endif
> +
>  #define INVALID_VMID 0 /* VMID 0 is reserved */
>
>  static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
>
>  /*
> - * VTTBR_EL2 VMID field is 8 bits. Using a bitmap here limits us to
> - * 256 concurrent domains.
> + * VTTBR_EL2 VMID field is 8 or 16 bits. Aarch64 supports 16-bit VMID.
> + * Using a bitmap here limits us to 256 or 65536 (for Aarch64) concurrent
> + * domains. The bitmap space will be allocated dynamically based on
> + * whether 8 or 16 bit VMIDs are supported.
>   */
> -static DECLARE_BITMAP(vmid_mask, MAX_VMID);
> +static unsigned long *vmid_mask=NULL;

Coding style here.

vmid_mask = NULL;

However, the NULL is not necessary here as global variable are 
initialized to 0 by default.

>
> -void p2m_vmid_allocator_init(void)
> +int p2m_vmid_allocator_init(void)
>  {
> -    set_bit(INVALID_VMID, vmid_mask);
> +    int ret=0;

Coding style;

int ret = 0;

> +
> +#ifdef CONFIG_ARM_64
> +
> +    unsigned int cpu;
> +
> +    /*
> +     * initialize max_vmid to 16 bits by default
> +     */
> +    max_vmid = MAX_VMID_16_BIT;

This could be done at the declaration of max_vmid.

> +
> +    /*
> +     * if any cpu does not support 16-bit VMID then restrict the
> +     * max VMIDs which can be allocated to 256
> +     */
> +    for_each_online_cpu ( cpu )
> +    {
> +        const struct cpuinfo_arm *info = &cpu_data[cpu];
> +
> +        if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
> +        {
> +            max_vmid = MAX_VMID_8_BIT;
> +            break;
> +        }
> +    }

I still think this is very confusing to consider 16-bit VMID by default 
as this is an enhancement in a newer architecture.

I would prefer to see the loop inverted, i.e checking whether all the 
CPUs support 16-bit and set max_vmid to 16 bit if supported.

If you disagree please explain why.

> +
> +#endif
> +
> +    /*
> +     * allocate space for vmid_mask based on max_vmid
> +     */
> +    vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
> +
> +    if ( vmid_mask )
> +        set_bit(INVALID_VMID, vmid_mask);
> +    else
> +        ret = -1;
> +
> +    return ret;
>  }
>
>  static int p2m_alloc_vmid(struct domain *d)
> @@ -1646,6 +1696,10 @@ void __init setup_virt_paging(void)
>
>      val |= VTCR_PS(pa_range);
>      val |= VTCR_TG0_4K;
> +
> +    /* set the VS bit only if 16 bit VMID is supported */
> +    if ( MAX_VMID == MAX_VMID_16_BIT )
> +        val |= VTCR_VS;
>      val |= VTCR_SL0(pa_range_info[pa_range].sl0);
>      val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
>
> diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
> index 38eb888..e240b12 100644
> --- a/xen/arch/arm/setup.c
> +++ b/xen/arch/arm/setup.c
> @@ -789,7 +789,8 @@ void __init start_xen(unsigned long boot_phys_offset,
>
>      gic_init();
>
> -    p2m_vmid_allocator_init();
> +    if ( p2m_vmid_allocator_init() != 0 )
> +        panic("Could not allocate VMID bitmap space");

I am not sure why we have to initialize the VMID allocator far before 
setting up the stage-2 translation (see call setup_virt_paging).

Overall, VMID are part of stage-2 subsystem. So I think it would be 
better to move this call in setup_virt_paging.

With that you could take advantage of the for_each_online loop in 
setup_virt_paging and avoid to have go through again all CPUs.

So what I would like to see is:
    - Patch #1: move p2m_vmid_allocator_init in setup_virt_paging
    - Patch #2: Add support for 16 bit VMIDs

>
>      softirq_init();
>
> diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
> index fdb6b47..b998e69 100644
> --- a/xen/include/asm-arm/p2m.h
> +++ b/xen/include/asm-arm/p2m.h
> @@ -30,7 +30,7 @@ struct p2m_domain {
>      struct page_info *root;
>
>      /* Current VMID in use */
> -    uint8_t vmid;
> +    uint16_t vmid;
>
>      /* Current Translation Table Base Register for the p2m */
>      uint64_t vttbr;
> @@ -153,7 +153,7 @@ void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
>  }
>
>  /* Initialise vmid allocator */
> -void p2m_vmid_allocator_init(void);
> +int p2m_vmid_allocator_init(void);
>
>  /* Second stage paging setup, to be called on all CPUs */
>  void setup_virt_paging(void);
> diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
> index 15bf890..48ce59b 100644
> --- a/xen/include/asm-arm/processor.h
> +++ b/xen/include/asm-arm/processor.h
> @@ -215,6 +215,8 @@
>
>  #define VTCR_PS(x)      ((x)<<16)
>
> +#define VTCR_VS    	    (_AC(0x1,UL)<<19)
> +
>  #endif
>
>  #define VTCR_RES1       (_AC(1,UL)<<31)
> @@ -269,6 +271,11 @@
>  /* FSR long format */
>  #define FSRL_STATUS_DEBUG       (_AC(0x22,UL)<<0)
>
> +#ifdef CONFIG_ARM_64
> +#define MM64_VMID_8_BITS_SUPPORT    0x0
> +#define MM64_VMID_16_BITS_SUPPORT   0x2
> +#endif
> +
>  #ifndef __ASSEMBLY__
>
>  struct cpuinfo_arm {
> @@ -337,7 +344,16 @@ struct cpuinfo_arm {
>              unsigned long tgranule_64K:4;
>              unsigned long tgranule_4K:4;
>              unsigned long __res0:32;
> -       };
> +
> +            unsigned long hafdbs:4;
> +            unsigned long vmid_bits:4;
> +            unsigned long vh:4;
> +            unsigned long hpds:4;
> +            unsigned long lo:4;
> +            unsigned long pan:4;
> +            unsigned long __res1:8;
> +            unsigned long __res2:32;
> +        };
>      } mm64;
>
>      struct {
>

[1] https://www.kernel.org/doc/Documentation/SubmittingPatches
Bhupinder Thakur Nov. 30, 2016, 1:31 p.m. UTC | #2
Hi Julien,

>>
>> VMID space is increased to 16-bits from 8-bits in ARMv8 8.1 revision.
>> This allows more than 256 VMs to be supported by Xen.
>>
>> This change adds support for 16-bit VMIDs in Xen based on whether the
>> architecture supports it.
>>
>> Signed-off-by: Bhupinder Thakur <bhupinder.thakur@linaro.org>
>> Reviewed-by: Julien Grall <julien.grall@arm.com>
>> Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
>
>
> The tag reviewed-by has a strong meaning, it means that the person has fully reviewed the code and he agrees with the modifications (i.e the code is good to be pushed). For more details see a good description in in Linux doc (see [1]).
>
> In this case, neither Stefano nor I gave it. So please drop them until one of us formally gave him. (e.g Reviewed-by: Name <email>).
>

I will remove the reviewed-by tags. I will check the reference
mentioned by you for the guidelines.

>> @@ -19,6 +20,7 @@ static unsigned int __read_mostly p2m_root_order;
>>  static unsigned int __read_mostly p2m_root_level;
>>  #define P2M_ROOT_ORDER    p2m_root_order
>>  #define P2M_ROOT_LEVEL p2m_root_level
>> +static unsigned int __read_mostly max_vmid;
>
>
> I would much prefer if max_vmid and MAX_VMID are defined together.
>

will move the definitions together.

>
>> - * VTTBR_EL2 VMID field is 8 bits. Using a bitmap here limits us to
>> - * 256 concurrent domains.
>> + * VTTBR_EL2 VMID field is 8 or 16 bits. Aarch64 supports 16-bit VMID.
>> + * Using a bitmap here limits us to 256 or 65536 (for Aarch64) concurrent
>> + * domains. The bitmap space will be allocated dynamically based on
>> + * whether 8 or 16 bit VMIDs are supported.
>>   */
>> -static DECLARE_BITMAP(vmid_mask, MAX_VMID);
>> +static unsigned long *vmid_mask=NULL;
>
>
> Coding style here.
>
> vmid_mask = NULL;
>
> However, the NULL is not necessary here as global variable are initialized to 0 by default.
>
ok.

>>
>> -void p2m_vmid_allocator_init(void)
>> +int p2m_vmid_allocator_init(void)
>>  {
>> -    set_bit(INVALID_VMID, vmid_mask);
>> +    int ret=0;
>
>
> Coding style;
>
> int ret = 0;

ok.

>
>> +
>> +#ifdef CONFIG_ARM_64
>> +
>> +    unsigned int cpu;
>> +
>> +    /*
>> +     * initialize max_vmid to 16 bits by default
>> +     */
>> +    max_vmid = MAX_VMID_16_BIT;
>
>
> This could be done at the declaration of max_vmid.
>

ok.

>> +
>> +    /*
>> +     * if any cpu does not support 16-bit VMID then restrict the
>> +     * max VMIDs which can be allocated to 256
>> +     */
>> +    for_each_online_cpu ( cpu )
>> +    {
>> +        const struct cpuinfo_arm *info = &cpu_data[cpu];
>> +
>> +        if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
>> +        {
>> +            max_vmid = MAX_VMID_8_BIT;
>> +            break;
>> +        }
>> +    }
>
>
> I still think this is very confusing to consider 16-bit VMID by default as this is an enhancement in a newer architecture.
>
> I would prefer to see the loop inverted, i.e checking whether all the CPUs support 16-bit and set max_vmid to 16 bit if supported.
>
> If you disagree please explain why.
>
The reason for doing it this way was to avoid using another variable
which would tell whether the FOR loop ran to completion (only then the
max_vmid can be set to MAX_VMID_16_BIT). By inverting the check I
avoided that extra variable.

>
>> --- a/xen/arch/arm/setup.c
>> +++ b/xen/arch/arm/setup.c
>> @@ -789,7 +789,8 @@ void __init start_xen(unsigned long boot_phys_offset,
>>
>>      gic_init();
>>
>> -    p2m_vmid_allocator_init();
>> +    if ( p2m_vmid_allocator_init() != 0 )
>> +        panic("Could not allocate VMID bitmap space");
>
>
> I am not sure why we have to initialize the VMID allocator far before setting up the stage-2 translation (see call setup_virt_paging).
>
> Overall, VMID are part of stage-2 subsystem. So I think it would be better to move this call in setup_virt_paging.
>
> With that you could take advantage of the for_each_online loop in setup_virt_paging and avoid to have go through again all CPUs.
>
> So what I would like to see is:
>    - Patch #1: move p2m_vmid_allocator_init in setup_virt_paging
>    - Patch #2: Add support for 16 bit VMIDs

The VMIDs are allocated from arch_init_memory () also (via
domain_create ()), which happens before setup_virt_paging ().


Regards,
Bhupinder
Julien Grall Dec. 6, 2016, 3:44 p.m. UTC | #3
On 30/11/16 13:31, Bhupinder Thakur wrote:
> Hi Julien,

Hi Bhupinder,

Apologies for the late answer.

>>
>>> +
>>> +#ifdef CONFIG_ARM_64
>>> +
>>> +    unsigned int cpu;
>>> +
>>> +    /*
>>> +     * initialize max_vmid to 16 bits by default
>>> +     */
>>> +    max_vmid = MAX_VMID_16_BIT;
>>
>>
>> This could be done at the declaration of max_vmid.
>>
>
> ok.
>
>>> +
>>> +    /*
>>> +     * if any cpu does not support 16-bit VMID then restrict the
>>> +     * max VMIDs which can be allocated to 256
>>> +     */
>>> +    for_each_online_cpu ( cpu )
>>> +    {
>>> +        const struct cpuinfo_arm *info = &cpu_data[cpu];
>>> +
>>> +        if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
>>> +        {
>>> +            max_vmid = MAX_VMID_8_BIT;
>>> +            break;
>>> +        }
>>> +    }
>>
>>
>> I still think this is very confusing to consider 16-bit VMID by default as this is an enhancement in a newer architecture.
>>
>> I would prefer to see the loop inverted, i.e checking whether all the CPUs support 16-bit and set max_vmid to 16 bit if supported.
>>
>> If you disagree please explain why.
>>
> The reason for doing it this way was to avoid using another variable
> which would tell whether the FOR loop ran to completion (only then the
> max_vmid can be set to MAX_VMID_16_BIT). By inverting the check I
> avoided that extra variable.

I tend to prefer a more readable code even if it means to have a bit 
more code. This is more maintainable in long-term. In this specific,
I don't think avoiding an extra variable could justify this.

>
>>
>>> --- a/xen/arch/arm/setup.c
>>> +++ b/xen/arch/arm/setup.c
>>> @@ -789,7 +789,8 @@ void __init start_xen(unsigned long boot_phys_offset,
>>>
>>>      gic_init();
>>>
>>> -    p2m_vmid_allocator_init();
>>> +    if ( p2m_vmid_allocator_init() != 0 )
>>> +        panic("Could not allocate VMID bitmap space");
>>
>>
>> I am not sure why we have to initialize the VMID allocator far before setting up the stage-2 translation (see call setup_virt_paging).
>>
>> Overall, VMID are part of stage-2 subsystem. So I think it would be better to move this call in setup_virt_paging.
>>
>> With that you could take advantage of the for_each_online loop in setup_virt_paging and avoid to have go through again all CPUs.
>>
>> So what I would like to see is:
>>    - Patch #1: move p2m_vmid_allocator_init in setup_virt_paging
>>    - Patch #2: Add support for 16 bit VMIDs
>
> The VMIDs are allocated from arch_init_memory () also (via
> domain_create ()), which happens before setup_virt_paging ().

That's not correct. The domains allocated in arch_init_memory does not 
require to have stage-2 page table (see the DOMCRF_dummy flags). The 
first created domain requiring a VMID will be DOM0 that is initialized 
after setup_virt_paging is called.

Regards,
Bhupinder Thakur Dec. 9, 2016, 10:49 a.m. UTC | #4
Hi Julien,

On 6 December 2016 at 21:14, Julien Grall <julien.grall@arm.com> wrote:
>>>> +
>>>> +    /*
>>>> +     * if any cpu does not support 16-bit VMID then restrict the
>>>> +     * max VMIDs which can be allocated to 256
>>>> +     */
>>>> +    for_each_online_cpu ( cpu )
>>>> +    {
>>>> +        const struct cpuinfo_arm *info = &cpu_data[cpu];
>>>> +
>>>> +        if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
>>>> +        {
>>>> +            max_vmid = MAX_VMID_8_BIT;
>>>> +            break;
>>>> +        }
>>>> +    }
>>>
>>>
>>>
>>> I still think this is very confusing to consider 16-bit VMID by default
>>> as this is an enhancement in a newer architecture.
>>>
>>> I would prefer to see the loop inverted, i.e checking whether all the
>>> CPUs support 16-bit and set max_vmid to 16 bit if supported.
>>>
>>> If you disagree please explain why.
>>>
>> The reason for doing it this way was to avoid using another variable
>> which would tell whether the FOR loop ran to completion (only then the
>> max_vmid can be set to MAX_VMID_16_BIT). By inverting the check I
>> avoided that extra variable.
>
>
> I tend to prefer a more readable code even if it means to have a bit more
> code. This is more maintainable in long-term. In this specific,
> I don't think avoiding an extra variable could justify this.
>
I will modify the code accordingly.

>>
>>>
>>>> --- a/xen/arch/arm/setup.c
>>>> +++ b/xen/arch/arm/setup.c
>>>> @@ -789,7 +789,8 @@ void __init start_xen(unsigned long
>>>> boot_phys_offset,
>>>>
>>>>      gic_init();
>>>>
>>>> -    p2m_vmid_allocator_init();
>>>> +    if ( p2m_vmid_allocator_init() != 0 )
>>>> +        panic("Could not allocate VMID bitmap space");
>>>
>>>
>>>
>>> I am not sure why we have to initialize the VMID allocator far before
>>> setting up the stage-2 translation (see call setup_virt_paging).
>>>
>>> Overall, VMID are part of stage-2 subsystem. So I think it would be
>>> better to move this call in setup_virt_paging.
>>>
>>> With that you could take advantage of the for_each_online loop in
>>> setup_virt_paging and avoid to have go through again all CPUs.
>>>
>>> So what I would like to see is:
>>>    - Patch #1: move p2m_vmid_allocator_init in setup_virt_paging
>>>    - Patch #2: Add support for 16 bit VMIDs
>>
I believe the 2nd patch should be based on the first patch. So they
should be applied in that order only. Should I send these two patches
as a series like [patch 1/2] and [patch 2/2]?
>>
>> The VMIDs are allocated from arch_init_memory () also (via
>> domain_create ()), which happens before setup_virt_paging ().
>
>
> That's not correct. The domains allocated in arch_init_memory does not
> require to have stage-2 page table (see the DOMCRF_dummy flags). The first
> created domain requiring a VMID will be DOM0 that is initialized after
> setup_virt_paging is called.

Yes. I will move p2m_vmid_allocator_init() inside setup_virt_paging().
Julien Grall Dec. 9, 2016, 6:11 p.m. UTC | #5
On 09/12/16 10:49, Bhupinder Thakur wrote:
> Hi Julien,

Hi Bhupinder,

> On 6 December 2016 at 21:14, Julien Grall <julien.grall@arm.com> wrote:

>>>
>>>>
>>>>> --- a/xen/arch/arm/setup.c
>>>>> +++ b/xen/arch/arm/setup.c
>>>>> @@ -789,7 +789,8 @@ void __init start_xen(unsigned long
>>>>> boot_phys_offset,
>>>>>
>>>>>      gic_init();
>>>>>
>>>>> -    p2m_vmid_allocator_init();
>>>>> +    if ( p2m_vmid_allocator_init() != 0 )
>>>>> +        panic("Could not allocate VMID bitmap space");
>>>>
>>>>
>>>>
>>>> I am not sure why we have to initialize the VMID allocator far before
>>>> setting up the stage-2 translation (see call setup_virt_paging).
>>>>
>>>> Overall, VMID are part of stage-2 subsystem. So I think it would be
>>>> better to move this call in setup_virt_paging.
>>>>
>>>> With that you could take advantage of the for_each_online loop in
>>>> setup_virt_paging and avoid to have go through again all CPUs.
>>>>
>>>> So what I would like to see is:
>>>>    - Patch #1: move p2m_vmid_allocator_init in setup_virt_paging
>>>>    - Patch #2: Add support for 16 bit VMIDs
>>>
> I believe the 2nd patch should be based on the first patch. So they
> should be applied in that order only. Should I send these two patches
> as a series like [patch 1/2] and [patch 2/2]?

That's right.

Regards,
diff mbox

Patch

diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c
index cc5634b..f036cfb 100644
--- a/xen/arch/arm/p2m.c
+++ b/xen/arch/arm/p2m.c
@@ -7,6 +7,7 @@ 
 #include <xen/vm_event.h>
 #include <xen/monitor.h>
 #include <xen/iocap.h>
+#include <xen/xmalloc.h>
 #include <public/vm_event.h>
 #include <asm/flushtlb.h>
 #include <asm/gic.h>
@@ -19,6 +20,7 @@  static unsigned int __read_mostly p2m_root_order;
 static unsigned int __read_mostly p2m_root_level;
 #define P2M_ROOT_ORDER    p2m_root_order
 #define P2M_ROOT_LEVEL p2m_root_level
+static unsigned int __read_mostly max_vmid;
 #else
 /* First level P2M is alway 2 consecutive pages */
 #define P2M_ROOT_LEVEL 1
@@ -1219,7 +1221,7 @@  static int p2m_alloc_table(struct domain *d)
 
     p2m->root = page;
 
-    p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid & 0xff) << 48;
+    p2m->vttbr = page_to_maddr(p2m->root) | ((uint64_t)p2m->vmid << 48);
 
     /*
      * Make sure that all TLBs corresponding to the new VMID are flushed
@@ -1230,20 +1232,68 @@  static int p2m_alloc_table(struct domain *d)
     return 0;
 }
 
-#define MAX_VMID 256
+#define MAX_VMID_8_BIT  (1UL << 8)
+#define MAX_VMID_16_BIT (1UL << 16)
+
+#ifdef CONFIG_ARM_64
+#define MAX_VMID        max_vmid
+#else
+#define MAX_VMID        MAX_VMID_8_BIT
+#endif
+
 #define INVALID_VMID 0 /* VMID 0 is reserved */
 
 static spinlock_t vmid_alloc_lock = SPIN_LOCK_UNLOCKED;
 
 /*
- * VTTBR_EL2 VMID field is 8 bits. Using a bitmap here limits us to
- * 256 concurrent domains.
+ * VTTBR_EL2 VMID field is 8 or 16 bits. Aarch64 supports 16-bit VMID. 
+ * Using a bitmap here limits us to 256 or 65536 (for Aarch64) concurrent 
+ * domains. The bitmap space will be allocated dynamically based on 
+ * whether 8 or 16 bit VMIDs are supported.
  */
-static DECLARE_BITMAP(vmid_mask, MAX_VMID);
+static unsigned long *vmid_mask=NULL;
 
-void p2m_vmid_allocator_init(void)
+int p2m_vmid_allocator_init(void)
 {
-    set_bit(INVALID_VMID, vmid_mask);
+    int ret=0;
+
+#ifdef CONFIG_ARM_64
+
+    unsigned int cpu;
+
+    /*
+     * initialize max_vmid to 16 bits by default
+     */
+    max_vmid = MAX_VMID_16_BIT;
+
+    /* 
+     * if any cpu does not support 16-bit VMID then restrict the 
+     * max VMIDs which can be allocated to 256
+     */
+    for_each_online_cpu ( cpu )
+    {
+        const struct cpuinfo_arm *info = &cpu_data[cpu];
+
+        if ( info->mm64.vmid_bits != MM64_VMID_16_BITS_SUPPORT )
+        {
+            max_vmid = MAX_VMID_8_BIT;
+            break;
+        }
+    }
+
+#endif
+
+    /*
+     * allocate space for vmid_mask based on max_vmid
+     */
+    vmid_mask = xzalloc_array(unsigned long, BITS_TO_LONGS(MAX_VMID));
+
+    if ( vmid_mask )
+        set_bit(INVALID_VMID, vmid_mask);
+    else
+        ret = -1;
+
+    return ret;
 }
 
 static int p2m_alloc_vmid(struct domain *d)
@@ -1646,6 +1696,10 @@  void __init setup_virt_paging(void)
 
     val |= VTCR_PS(pa_range);
     val |= VTCR_TG0_4K;
+
+    /* set the VS bit only if 16 bit VMID is supported */
+    if ( MAX_VMID == MAX_VMID_16_BIT )
+        val |= VTCR_VS;
     val |= VTCR_SL0(pa_range_info[pa_range].sl0);
     val |= VTCR_T0SZ(pa_range_info[pa_range].t0sz);
 
diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c
index 38eb888..e240b12 100644
--- a/xen/arch/arm/setup.c
+++ b/xen/arch/arm/setup.c
@@ -789,7 +789,8 @@  void __init start_xen(unsigned long boot_phys_offset,
 
     gic_init();
 
-    p2m_vmid_allocator_init();
+    if ( p2m_vmid_allocator_init() != 0 )
+        panic("Could not allocate VMID bitmap space");
 
     softirq_init();
 
diff --git a/xen/include/asm-arm/p2m.h b/xen/include/asm-arm/p2m.h
index fdb6b47..b998e69 100644
--- a/xen/include/asm-arm/p2m.h
+++ b/xen/include/asm-arm/p2m.h
@@ -30,7 +30,7 @@  struct p2m_domain {
     struct page_info *root;
 
     /* Current VMID in use */
-    uint8_t vmid;
+    uint16_t vmid;
 
     /* Current Translation Table Base Register for the p2m */
     uint64_t vttbr;
@@ -153,7 +153,7 @@  void p2m_altp2m_check(struct vcpu *v, uint16_t idx)
 }
 
 /* Initialise vmid allocator */
-void p2m_vmid_allocator_init(void);
+int p2m_vmid_allocator_init(void);
 
 /* Second stage paging setup, to be called on all CPUs */
 void setup_virt_paging(void);
diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h
index 15bf890..48ce59b 100644
--- a/xen/include/asm-arm/processor.h
+++ b/xen/include/asm-arm/processor.h
@@ -215,6 +215,8 @@ 
 
 #define VTCR_PS(x)      ((x)<<16)
 
+#define VTCR_VS    	    (_AC(0x1,UL)<<19)
+
 #endif
 
 #define VTCR_RES1       (_AC(1,UL)<<31)
@@ -269,6 +271,11 @@ 
 /* FSR long format */
 #define FSRL_STATUS_DEBUG       (_AC(0x22,UL)<<0)
 
+#ifdef CONFIG_ARM_64
+#define MM64_VMID_8_BITS_SUPPORT    0x0
+#define MM64_VMID_16_BITS_SUPPORT   0x2
+#endif
+
 #ifndef __ASSEMBLY__
 
 struct cpuinfo_arm {
@@ -337,7 +344,16 @@  struct cpuinfo_arm {
             unsigned long tgranule_64K:4;
             unsigned long tgranule_4K:4;
             unsigned long __res0:32;
-       };
+
+            unsigned long hafdbs:4;
+            unsigned long vmid_bits:4;
+            unsigned long vh:4;
+            unsigned long hpds:4;
+            unsigned long lo:4;
+            unsigned long pan:4;
+            unsigned long __res1:8;
+            unsigned long __res2:32;
+        };
     } mm64;
 
     struct {