diff mbox

[Xen-devel,v2,16/17] libxc/xc_dom_arm: Copy ACPI tables to guest space

Message ID 1466651824-6964-17-git-send-email-zhaoshenglong@huawei.com
State New
Headers show

Commit Message

Shannon Zhao June 23, 2016, 3:17 a.m. UTC
From: Shannon Zhao <shannon.zhao@linaro.org>

Copy all the ACPI tables to guest space so that UEFI or guest could
access them.

Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
---
 tools/libxc/xc_dom_arm.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)

Comments

Julien Grall June 23, 2016, 6:46 p.m. UTC | #1
Hi Shannon,

On 23/06/2016 04:17, Shannon Zhao wrote:
> From: Shannon Zhao <shannon.zhao@linaro.org>
>
> Copy all the ACPI tables to guest space so that UEFI or guest could
> access them.
>
> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
> ---
>  tools/libxc/xc_dom_arm.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 51 insertions(+)
>
> diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
> index 64a8b67..6a0a5b7 100644
> --- a/tools/libxc/xc_dom_arm.c
> +++ b/tools/libxc/xc_dom_arm.c
> @@ -63,6 +63,47 @@ static int setup_pgtables_arm(struct xc_dom_image *dom)
>
>  /* ------------------------------------------------------------------------ */
>
> +static int xc_dom_copy_acpi(struct xc_dom_image *dom)
> +{
> +    int rc, i;
> +    uint32_t pages_num = ROUNDUP(dom->acpitable_size, XC_PAGE_SHIFT) >>
> +                         XC_PAGE_SHIFT;
> +    const xen_pfn_t base = GUEST_ACPI_BASE >> XC_PAGE_SHIFT;
> +    xen_pfn_t *p2m;
> +    void *acpi_pages;
> +
> +    p2m = malloc(pages_num * sizeof(*p2m));
> +    for (i = 0; i < pages_num; i++)
> +        p2m[i] = base + i;
> +
> +    rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
> +                                          pages_num, 0, 0, p2m);

Hmmmm... it looks like this is working because libxl is setting the 
maximum size of the domain with some slack (1MB). However, I guess the 
slack was for something else. Wei, Stefano, Ian, can you confirm?

> +    if ( rc )
> +    {
> +        DOMPRINTF("%s: xc_domain_populate_physmap_exact failed with %d",
> +                  __FUNCTION__, rc);
> +        goto out;
> +    }
> +
> +    acpi_pages = xc_map_foreign_range(dom->xch, dom->guest_domid,
> +                                      PAGE_SIZE * pages_num,
> +                                      PROT_READ | PROT_WRITE, base);
> +    if ( !acpi_pages )
> +    {
> +        DOMPRINTF("%s Can't map acpi_pages", __FUNCTION__);
> +        rc = -1;
> +        goto out;
> +    }
> +
> +    memcpy(acpi_pages, dom->acpitable_blob, dom->acpitable_size);
> +
> +out:
> +    munmap(acpi_pages, pages_num * PAGE_SIZE);
> +    free(p2m);
> +
> +    return rc;
> +}
> +
>  static int alloc_magic_pages(struct xc_dom_image *dom)
>  {
	>      int rc, i;
> @@ -100,6 +141,16 @@ static int alloc_magic_pages(struct xc_dom_image *dom)
>      xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_EVTCHN,
>              dom->xenstore_evtchn);
>
> +    if ( dom->acpitable_blob && dom->acpitable_size > 0 )
> +    {
> +        rc = xc_dom_copy_acpi(dom);
> +        if ( rc != 0 )
> +        {
> +            DOMPRINTF("Unable to copy ACPI tables");
> +            return rc;
> +        }
> +    }

alloc_magic_pages looks the wrong place with this function. Any reason 
to not have a generic ACPI blob loading in xc_dom_core.c as we do for 
devicetree?

Regards,
Julien Grall June 27, 2016, 10:49 a.m. UTC | #2
(CC Boris)

On 27/06/16 07:25, Shannon Zhao wrote:
>
>
> On 2016/6/24 2:46, Julien Grall wrote:
>>> +
>>>   static int alloc_magic_pages(struct xc_dom_image *dom)
>>>   {
>>      >      int rc, i;
>>> @@ -100,6 +141,16 @@ static int alloc_magic_pages(struct xc_dom_image
>>> *dom)
>>>       xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_EVTCHN,
>>>               dom->xenstore_evtchn);
>>>
>>> +    if ( dom->acpitable_blob && dom->acpitable_size > 0 )
>>> +    {
>>> +        rc = xc_dom_copy_acpi(dom);
>>> +        if ( rc != 0 )
>>> +        {
>>> +            DOMPRINTF("Unable to copy ACPI tables");
>>> +            return rc;
>>> +        }
>>> +    }
>>
>> alloc_magic_pages looks the wrong place with this function. Any reason
>> to not have a generic ACPI blob loading in xc_dom_core.c as we do for
>> devicetree?
> Looks like xc_dom_build_image is used for allocating pages in guest RAM
> while ACPI blob is not put in guest RAM.

The function xc_dom_build_image is used to load blob into the guest 
memory and allocate others pages (such as magic pages) which may not be 
part of the actual guest RAM.

The callback alloc_magic_pages is used to allocate per-architecture 
specific pages. However, the ACPI blob is pretty much generic. So I am 
not sure why we would want to do it for ARM only.

Regards,
Julien Grall July 6, 2016, 9:46 a.m. UTC | #3
Hi Stefano,

On 05/07/16 18:13, Stefano Stabellini wrote:
> On Thu, 23 Jun 2016, Julien Grall wrote:
>> On 23/06/2016 04:17, Shannon Zhao wrote:
>>> From: Shannon Zhao <shannon.zhao@linaro.org>
>>>
>>> Copy all the ACPI tables to guest space so that UEFI or guest could
>>> access them.
>>>
>>> Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
>>> ---
>>>   tools/libxc/xc_dom_arm.c | 51
>>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>>   1 file changed, 51 insertions(+)
>>>
>>> diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
>>> index 64a8b67..6a0a5b7 100644
>>> --- a/tools/libxc/xc_dom_arm.c
>>> +++ b/tools/libxc/xc_dom_arm.c
>>> @@ -63,6 +63,47 @@ static int setup_pgtables_arm(struct xc_dom_image *dom)
>>>
>>>   /* ------------------------------------------------------------------------
>>> */
>>>
>>> +static int xc_dom_copy_acpi(struct xc_dom_image *dom)
>>> +{
>>> +    int rc, i;
>>> +    uint32_t pages_num = ROUNDUP(dom->acpitable_size, XC_PAGE_SHIFT) >>
>>> +                         XC_PAGE_SHIFT;
>>> +    const xen_pfn_t base = GUEST_ACPI_BASE >> XC_PAGE_SHIFT;
>>> +    xen_pfn_t *p2m;
>>> +    void *acpi_pages;
>>> +
>>> +    p2m = malloc(pages_num * sizeof(*p2m));
>>> +    for (i = 0; i < pages_num; i++)
>>> +        p2m[i] = base + i;
>>> +
>>> +    rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
>>> +                                          pages_num, 0, 0, p2m);
>>
>> Hmmmm... it looks like this is working because libxl is setting the maximum
>> size of the domain with some slack (1MB). However, I guess the slack was for
>> something else. Wei, Stefano, Ian, can you confirm?
>
> If I recall correctly, the slack is a magic value coming from the
> ancient history of toolstacks.

Does it mean we would need to update the slack to take into account the 
ACPI blob?

Cheers,
Julien Grall July 12, 2016, 9:25 a.m. UTC | #4
Hi Shannon,

On 12/07/16 04:47, Shannon Zhao wrote:
> On 2016/7/6 18:12, Stefano Stabellini wrote:
>> On Wed, 6 Jul 2016, Julien Grall wrote:
>>>> On 05/07/16 18:13, Stefano Stabellini wrote:
>>>>>> On Thu, 23 Jun 2016, Julien Grall wrote:
>>>>>>>> On 23/06/2016 04:17, Shannon Zhao wrote:
>>>>>>>>>> From: Shannon Zhao <shannon.zhao@linaro.org>
>>>>>>>>>> +static int xc_dom_copy_acpi(struct xc_dom_image *dom)
>>>>>>>>>> +{
>>>>>>>>>> +    int rc, i;
>>>>>>>>>> +    uint32_t pages_num = ROUNDUP(dom->acpitable_size, XC_PAGE_SHIFT) >>
>>>>>>>>>> +                         XC_PAGE_SHIFT;
>>>>>>>>>> +    const xen_pfn_t base = GUEST_ACPI_BASE >> XC_PAGE_SHIFT;
>>>>>>>>>> +    xen_pfn_t *p2m;
>>>>>>>>>> +    void *acpi_pages;
>>>>>>>>>> +
>>>>>>>>>> +    p2m = malloc(pages_num * sizeof(*p2m));
>>>>>>>>>> +    for (i = 0; i < pages_num; i++)
>>>>>>>>>> +        p2m[i] = base + i;
>>>>>>>>>> +
>>>>>>>>>> +    rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
>>>>>>>>>> +                                          pages_num, 0, 0, p2m);
>>>>>>>>
>>>>>>>> Hmmmm... it looks like this is working because libxl is setting the
>>>>>>>> maximum
>>>>>>>> size of the domain with some slack (1MB). However, I guess the slack was
>>>>>>>> for
>>>>>>>> something else. Wei, Stefano, Ian, can you confirm?
>>>>>>
>>>>>> If I recall correctly, the slack is a magic value coming from the
>>>>>> ancient history of toolstacks.
>>>>
>>>> Does it mean we would need to update the slack to take into account the ACPI
>>>> blob?
>> Yes, we need to take into account the ACPI blob. Probably not in the
>> slack but directly in mam_memkb.
> Sorry, I'm not sure understand this. I found the b_info->max_memkb but
> didn't find the slack you said. And how to fix this? Update
> b_info->max_memkb or the slack?

You can give a look to LIBXL_MAXMEM_CONSTANT.

I am not very familiar with libxl, so I will let Wei and Stefano giving 
you advice on how to fix this.

Regards,
Shannon Zhao July 12, 2016, 2:42 p.m. UTC | #5
On 2016年07月12日 19:35, Wei Liu wrote:
> On Tue, Jul 12, 2016 at 11:47:04AM +0800, Shannon Zhao wrote:
>> > 
>> > 
>> > On 2016/7/6 18:12, Stefano Stabellini wrote:
>>> > > On Wed, 6 Jul 2016, Julien Grall wrote:
>>>>> > >> > Hi Stefano,
>>>>> > >> > 
>>>>> > >> > On 05/07/16 18:13, Stefano Stabellini wrote:
>>>>>>> > >>> > > On Thu, 23 Jun 2016, Julien Grall wrote:
>>>>>>>>> > >>>> > > > On 23/06/2016 04:17, Shannon Zhao wrote:
>>>>>>>>>>> > >>>>> > > > > From: Shannon Zhao <shannon.zhao@linaro.org>
>>>>>>>>>>> > >>>>> > > > > 
>>>>>>>>>>> > >>>>> > > > > Copy all the ACPI tables to guest space so that UEFI or guest could
>>>>>>>>>>> > >>>>> > > > > access them.
>>>>>>>>>>> > >>>>> > > > > 
>>>>>>>>>>> > >>>>> > > > > Signed-off-by: Shannon Zhao <shannon.zhao@linaro.org>
>>>>>>>>>>> > >>>>> > > > > ---
>>>>>>>>>>> > >>>>> > > > >   tools/libxc/xc_dom_arm.c | 51
>>>>>>>>>>> > >>>>> > > > > ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>>>> > >>>>> > > > >   1 file changed, 51 insertions(+)
>>>>>>>>>>> > >>>>> > > > > 
>>>>>>>>>>> > >>>>> > > > > diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
>>>>>>>>>>> > >>>>> > > > > index 64a8b67..6a0a5b7 100644
>>>>>>>>>>> > >>>>> > > > > --- a/tools/libxc/xc_dom_arm.c
>>>>>>>>>>> > >>>>> > > > > +++ b/tools/libxc/xc_dom_arm.c
>>>>>>>>>>> > >>>>> > > > > @@ -63,6 +63,47 @@ static int setup_pgtables_arm(struct xc_dom_image
>>>>>>>>>>> > >>>>> > > > > *dom)
>>>>>>>>>>> > >>>>> > > > > 
>>>>>>>>>>> > >>>>> > > > >   /*
>>>>>>>>>>> > >>>>> > > > > ------------------------------------------------------------------------
>>>>>>>>>>> > >>>>> > > > > */
>>>>>>>>>>> > >>>>> > > > > 
>>>>>>>>>>> > >>>>> > > > > +static int xc_dom_copy_acpi(struct xc_dom_image *dom)
>>>>>>>>>>> > >>>>> > > > > +{
>>>>>>>>>>> > >>>>> > > > > +    int rc, i;
>>>>>>>>>>> > >>>>> > > > > +    uint32_t pages_num = ROUNDUP(dom->acpitable_size, XC_PAGE_SHIFT) >>
>>>>>>>>>>> > >>>>> > > > > +                         XC_PAGE_SHIFT;
>>>>>>>>>>> > >>>>> > > > > +    const xen_pfn_t base = GUEST_ACPI_BASE >> XC_PAGE_SHIFT;
>>>>>>>>>>> > >>>>> > > > > +    xen_pfn_t *p2m;
>>>>>>>>>>> > >>>>> > > > > +    void *acpi_pages;
>>>>>>>>>>> > >>>>> > > > > +
>>>>>>>>>>> > >>>>> > > > > +    p2m = malloc(pages_num * sizeof(*p2m));
>>>>>>>>>>> > >>>>> > > > > +    for (i = 0; i < pages_num; i++)
>>>>>>>>>>> > >>>>> > > > > +        p2m[i] = base + i;
>>>>>>>>>>> > >>>>> > > > > +
>>>>>>>>>>> > >>>>> > > > > +    rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
>>>>>>>>>>> > >>>>> > > > > +                                          pages_num, 0, 0, p2m);
>>>>>>>>> > >>>> > > > 
>>>>>>>>> > >>>> > > > Hmmmm... it looks like this is working because libxl is setting the
>>>>>>>>> > >>>> > > > maximum
>>>>>>>>> > >>>> > > > size of the domain with some slack (1MB). However, I guess the slack was
>>>>>>>>> > >>>> > > > for
>>>>>>>>> > >>>> > > > something else. Wei, Stefano, Ian, can you confirm?
>>>>>>> > >>> > > 
>>>>>>> > >>> > > If I recall correctly, the slack is a magic value coming from the
>>>>>>> > >>> > > ancient history of toolstacks.
>>>>> > >> > 
>>>>> > >> > Does it mean we would need to update the slack to take into account the ACPI
>>>>> > >> > blob?
>>> > > Yes, we need to take into account the ACPI blob. Probably not in the
>>> > > slack but directly in mam_memkb.
>> > Sorry, I'm not sure understand this. I found the b_info->max_memkb but
>> > didn't find the slack you said. And how to fix this? Update
>> > b_info->max_memkb or the slack?
> Can you calculate the size of your payload and add that to max_memkb?
> 
Yeah, but the size will be changed if we change the tables in the future
and this also should consider x86, right?

Thanks,
Shannon Zhao July 12, 2016, 2:57 p.m. UTC | #6
On 2016年07月12日 22:50, Wei Liu wrote:
> On Tue, Jul 12, 2016 at 10:42:07PM +0800, Shannon Zhao wrote:
>>>>>>>>>>> > >>>>> > >> > 
>>>>>>>>>>> > >>>>> > >> > Does it mean we would need to update the slack to take into account the ACPI
>>>>>>>>>>> > >>>>> > >> > blob?
>>>>>>> > >>> > > Yes, we need to take into account the ACPI blob. Probably not in the
>>>>>>> > >>> > > slack but directly in mam_memkb.
>>>>> > >> > Sorry, I'm not sure understand this. I found the b_info->max_memkb but
>>>>> > >> > didn't find the slack you said. And how to fix this? Update
>>>>> > >> > b_info->max_memkb or the slack?
>>> > > Can you calculate the size of your payload and add that to max_memkb?
>>> > > 
>> > Yeah, but the size will be changed if we change the tables in the future
>> > and this also should consider x86, right?
> That could easily be solved by introducing a function to calculate the
> size, right?
Oh, I'm not familiar with this. Let's clarify on this. It can add the
size to max_memkb after generating the ACPI tables and before loading
the tables to guest space and it doesn't have to add the size at
libxl__domain_build_info_setdefault(), right?

Thanks,
Julien Grall July 12, 2016, 4:10 p.m. UTC | #7
Hello,

On 12/07/2016 16:08, Boris Ostrovsky wrote:
> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>> On 2016年07月12日 22:50, Wei Liu wrote:
>>> On Tue, Jul 12, 2016 at 10:42:07PM +0800, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>>>>>>> Does it mean we would need to update the slack to take into account the ACPI
>>>>>>>>>>>>>>>>>>>>>>> blob?
>>>>>>>>>>>>>>> Yes, we need to take into account the ACPI blob. Probably not in the
>>>>>>>>>>>>>>> slack but directly in mam_memkb.
>>>>>>>>>>> Sorry, I'm not sure understand this. I found the b_info->max_memkb but
>>>>>>>>>>> didn't find the slack you said. And how to fix this? Update
>>>>>>>>>>> b_info->max_memkb or the slack?
>>>>>>> Can you calculate the size of your payload and add that to max_memkb?
>>>>>>>
>>>>> Yeah, but the size will be changed if we change the tables in the future
>>>>> and this also should consider x86, right?
>>> That could easily be solved by introducing a function to calculate the
>>> size, right?
>> Oh, I'm not familiar with this. Let's clarify on this. It can add the
>> size to max_memkb after generating the ACPI tables and before loading
>> the tables to guest space and it doesn't have to add the size at
>> libxl__domain_build_info_setdefault(), right?
>
> This was discussed before: ACPI tables are part of RAM whose size is
> specified by the config file (and is reflected in max_memkb I believe).
> It may not be presented to the guest as RAM (i.e. on x86 it is labeled
> by BIOS (or whoever) as a dedicated type in e820) but it still resides
> in DIMMs.

I don't think this was the conclusion of the thread. IHMO, "maxmem" is 
the amount of RAM a guest could effectively use.

Whilst the ACPI tables will be in the DIMM from the host point of view. 
 From a guest point of view it will be a ROM.

It will affect some others part of the guest if we don't increment the 
"maxmem" requested by the user. For ARM the ACPI blob will be exposed at 
a specific address that is outside of the guest RAM (see the guest 
memory layout in public/arch-arm.h).

We chose this solution over putting in the RAM because the ACPI tables 
are not easily relocatable (compare to the device tree, initrd and 
kernel) so we could not take advantage of superpage in both stage-2 
(hypervisor) and stage-1 (kernel) page table.

To give you a concrete example. When the user requests 1 gigabytes of 
memory, Xen will try to allocate a 1 gigabytes superpage.

If few kilobytes is removed from this 1 gigabytes, then we would have to 
use 2MB superpage which will impact both performance and memory usage.

So I think we should take into account the size of the ACPI blob in 
libxl and not reduce the memory requested by the user.

Regards,
Julien Grall July 13, 2016, 3:22 p.m. UTC | #8
Hello,

On 12/07/2016 17:58, Boris Ostrovsky wrote:
> On 07/12/2016 12:10 PM, Julien Grall wrote:
>> On 12/07/2016 16:08, Boris Ostrovsky wrote:
>>> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>>>> On 2016年07月12日 22:50, Wei Liu wrote:
>>>>> On Tue, Jul 12, 2016 at 10:42:07PM +0800, Shannon Zhao wrote:
>>>>>>>>>>>>>>>>>>>>>>>>> Does it mean we would need to update the slack
>>>>>>>>>>>>>>>>>>>>>>>>> to take into account the ACPI
>>>>>>>>>>>>>>>>>>>>>>>>> blob?
>>>>>>>>>>>>>>>>> Yes, we need to take into account the ACPI blob.
>>>>>>>>>>>>>>>>> Probably not in the
>>>>>>>>>>>>>>>>> slack but directly in mam_memkb.
>>>>>>>>>>>>> Sorry, I'm not sure understand this. I found the
>>>>>>>>>>>>> b_info->max_memkb but
>>>>>>>>>>>>> didn't find the slack you said. And how to fix this? Update
>>>>>>>>>>>>> b_info->max_memkb or the slack?
>>>>>>>>> Can you calculate the size of your payload and add that to
>>>>>>>>> max_memkb?
>>>>>>>>>
>>>>>>> Yeah, but the size will be changed if we change the tables in the
>>>>>>> future
>>>>>>> and this also should consider x86, right?
>>>>> That could easily be solved by introducing a function to calculate the
>>>>> size, right?
>>>> Oh, I'm not familiar with this. Let's clarify on this. It can add the
>>>> size to max_memkb after generating the ACPI tables and before loading
>>>> the tables to guest space and it doesn't have to add the size at
>>>> libxl__domain_build_info_setdefault(), right?
>>>
>>> This was discussed before: ACPI tables are part of RAM whose size is
>>> specified by the config file (and is reflected in max_memkb I believe).
>>> It may not be presented to the guest as RAM (i.e. on x86 it is labeled
>>> by BIOS (or whoever) as a dedicated type in e820) but it still resides
>>> in DIMMs.
>>
>> I don't think this was the conclusion of the thread. IHMO, "maxmem" is
>> the amount of RAM a guest could effectively use.
>>
>> Whilst the ACPI tables will be in the DIMM from the host point of
>> view. From a guest point of view it will be a ROM.
>
> The config file specifies resources provided by the host. How the guest
> views those resources is not important, I think.

This would need to be clarified. For instance special pages (Xenstore, 
Console...) are RAM from the host point of view but not taken into 
account in the "maxmem" provided by the user. For my understanding, some 
kB of the slack is used for that.

>
>>
>> It will affect some others part of the guest if we don't increment the
>> "maxmem" requested by the user. For ARM the ACPI blob will be exposed
>> at a specific address that is outside of the guest RAM (see the guest
>> memory layout in public/arch-arm.h).
>>
>> We chose this solution over putting in the RAM because the ACPI tables
>> are not easily relocatable (compare to the device tree, initrd and
>> kernel) so we could not take advantage of superpage in both stage-2
>> (hypervisor) and stage-1 (kernel) page table.
>
> Maybe this is something ARM-specific then. For x86 we will want to keep
> maxmem unchanged.

I don't think what I described in my previous mail is ARM-specific. The 
pressure will be more important on the TLBs, if Xen does not use 
superpage in the stage 2 page tables (i.e EPT for x86) no matter the 
architecture.

IHMO, this seems to be a bigger drawback compare to add few more 
kilobytes to maxmem in the toolstack for the ACPI blob. You will loose 
them when creating the intermediate page table in any case.

May I ask why you want to keep maxmem unchanged on x86?

Regards,
Julien Grall July 14, 2016, 11:29 a.m. UTC | #9
Hello,

On 13/07/16 18:08, Boris Ostrovsky wrote:
> On 07/13/2016 11:22 AM, Julien Grall wrote:
>> On 12/07/2016 17:58, Boris Ostrovsky wrote:
>>> The config file specifies resources provided by the host. How the guest
>>> views those resources is not important, I think.
>>
>> This would need to be clarified. For instance special pages (Xenstore,
>> Console...) are RAM from the host point of view but not taken into
>> account in the "maxmem" provided by the user. For my understanding,
>> some kB of the slack is used for that.
>
>
> Are these pages part of guest's address space?

Yes, they are used to communicate between the guest and the backend 
(xenconsole, xenstore,...).

Regards,
Julien Grall July 20, 2016, 12:33 p.m. UTC | #10
Hi,

On 14/07/16 14:37, Stefano Stabellini wrote:
> On Wed, 13 Jul 2016, Julien Grall wrote:
>> Hello,
>>
>> On 12/07/2016 17:58, Boris Ostrovsky wrote:
>>> On 07/12/2016 12:10 PM, Julien Grall wrote:
>>>> On 12/07/2016 16:08, Boris Ostrovsky wrote:
>>>>> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>>>> It will affect some others part of the guest if we don't increment the
>>>> "maxmem" requested by the user. For ARM the ACPI blob will be exposed
>>>> at a specific address that is outside of the guest RAM (see the guest
>>>> memory layout in public/arch-arm.h).
>>>>
>>>> We chose this solution over putting in the RAM because the ACPI tables
>>>> are not easily relocatable (compare to the device tree, initrd and
>>>> kernel) so we could not take advantage of superpage in both stage-2
>>>> (hypervisor) and stage-1 (kernel) page table.
>>>
>>> Maybe this is something ARM-specific then. For x86 we will want to keep
>>> maxmem unchanged.
>>
>> I don't think what I described in my previous mail is ARM-specific. The
>> pressure will be more important on the TLBs, if Xen does not use superpage in
>> the stage 2 page tables (i.e EPT for x86) no matter the architecture.
>>
>> IHMO, this seems to be a bigger drawback compare to add few more kilobytes to
>> maxmem in the toolstack for the ACPI blob. You will loose them when creating
>> the intermediate page table in any case.
>
> I agree with Julien. On ARM we have to increase maxmem because I don't
> think that shattering a superpage is acceptable for just a few KBs. In
> fact, it's not much about increasing maxmem, but it's about keeping the
> allocation of guest memory to the value passed by the user in "memory",
> so that it can be done in the most efficient way possible. (I am
> assuming users are going to allocate VMs of 2048MB, rather than 2049MB.)
>
> I wouldn't want to end up adding to the performance tuning page on the
> wiki "Make sure to add 1 more MB to the memory of your VM to get the
> most out of the system."
>
> I know that the location of the ACPI blob on x86 is different in guest
> memory space, but it seems to me that the problem would be the same. Do
> you have 1 gigabyte pages in stage-2 on x86? If so, I would think twice
> about this. Otherwise, if you only have 4K and 2MB allocations, then it
> might not make that much of a difference.

Looking at the x86 code, 1 gigabyte pages seems to be supported.

Boris, do you have any opinions on this?

Regards,
Julien Grall July 20, 2016, 1:41 p.m. UTC | #11
On 20/07/2016 14:33, Boris Ostrovsky wrote:
> On 07/20/2016 08:33 AM, Julien Grall wrote:
>> Hi,
>>
>> On 14/07/16 14:37, Stefano Stabellini wrote:
>>> On Wed, 13 Jul 2016, Julien Grall wrote:
>>>> Hello,
>>>>
>>>> On 12/07/2016 17:58, Boris Ostrovsky wrote:
>>>>> On 07/12/2016 12:10 PM, Julien Grall wrote:
>>>>>> On 12/07/2016 16:08, Boris Ostrovsky wrote:
>>>>>>> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>>>>>> It will affect some others part of the guest if we don't increment
>>>>>> the
>>>>>> "maxmem" requested by the user. For ARM the ACPI blob will be exposed
>>>>>> at a specific address that is outside of the guest RAM (see the guest
>>>>>> memory layout in public/arch-arm.h).
>>>>>>
>>>>>> We chose this solution over putting in the RAM because the ACPI
>>>>>> tables
>>>>>> are not easily relocatable (compare to the device tree, initrd and
>>>>>> kernel) so we could not take advantage of superpage in both stage-2
>>>>>> (hypervisor) and stage-1 (kernel) page table.
>>>>>
>>>>> Maybe this is something ARM-specific then. For x86 we will want to
>>>>> keep
>>>>> maxmem unchanged.
>>>>
>>>> I don't think what I described in my previous mail is ARM-specific. The
>>>> pressure will be more important on the TLBs, if Xen does not use
>>>> superpage in
>>>> the stage 2 page tables (i.e EPT for x86) no matter the architecture.
>>>>
>>>> IHMO, this seems to be a bigger drawback compare to add few more
>>>> kilobytes to
>>>> maxmem in the toolstack for the ACPI blob. You will loose them when
>>>> creating
>>>> the intermediate page table in any case.
>>>
>>> I agree with Julien. On ARM we have to increase maxmem because I don't
>>> think that shattering a superpage is acceptable for just a few KBs. In
>>> fact, it's not much about increasing maxmem, but it's about keeping the
>>> allocation of guest memory to the value passed by the user in "memory",
>>> so that it can be done in the most efficient way possible. (I am
>>> assuming users are going to allocate VMs of 2048MB, rather than 2049MB.)
>>>
>>> I wouldn't want to end up adding to the performance tuning page on the
>>> wiki "Make sure to add 1 more MB to the memory of your VM to get the
>>> most out of the system."
>>>
>>> I know that the location of the ACPI blob on x86 is different in guest
>>> memory space, but it seems to me that the problem would be the same. Do
>>> you have 1 gigabyte pages in stage-2 on x86? If so, I would think twice
>>> about this. Otherwise, if you only have 4K and 2MB allocations, then it
>>> might not make that much of a difference.
>>
>> Looking at the x86 code, 1 gigabyte pages seems to be supported.
>>
>> Boris, do you have any opinions on this?
>
>
> I don't think I understand the superpage shattering argument.  In x86
> the tables live in regular RAM and a guest is free to use those
> addresses as regular memory.
>
> This apparently is different from how ARM manages the tables (you said
> in an earlier message that they are not part of RAM) so I can see that
> taking memory from RAM allocation to store the tables may affect how
> mapping is done, potentially causing GB pages to be broken.
>
> In fact (and I am totally speculating here) padding memory for x86 may
> actually *cause* shattering because we will have (for example) 2049MB of
> RAM to deal with.

Correct me if I am wrong. On your series you are populating the page at 
a specific address for the ACPI tables separately to the RAM allocation. 
So you will shatter GB pages if the user provides 2048MB because the 
ACPI tables is accounted in the 2048MB.

The plan is to call setmaxmem with the size of the RAM + size of the 
ACPI tales. The RAM will still be 2048MB and therefore GB pages may be used.
Julien Grall July 21, 2016, 6:23 p.m. UTC | #12
Hi Stefano,

On 21/07/16 18:53, Stefano Stabellini wrote:
> On Wed, 20 Jul 2016, Boris Ostrovsky wrote:
>> On 07/20/2016 01:28 PM, Stefano Stabellini wrote:
>>> On Wed, 20 Jul 2016, Boris Ostrovsky wrote:
>>>> On 07/20/2016 09:41 AM, Julien Grall wrote:
>>>>>
>>>>> On 20/07/2016 14:33, Boris Ostrovsky wrote:
>>>>>> On 07/20/2016 08:33 AM, Julien Grall wrote:
>>>>>>> Hi,
>>>>>>>
>>>>>>> On 14/07/16 14:37, Stefano Stabellini wrote:
>>>>>>>> On Wed, 13 Jul 2016, Julien Grall wrote:
>>>>>>>>> Hello,
>>>>>>>>>
>>>>>>>>> On 12/07/2016 17:58, Boris Ostrovsky wrote:
>>>>>>>>>> On 07/12/2016 12:10 PM, Julien Grall wrote:
>>>>>>>>>>> On 12/07/2016 16:08, Boris Ostrovsky wrote:
>>>>>>>>>>>> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>>>>>>>>>>> It will affect some others part of the guest if we don't increment
>>>>>>>>>>> the
>>>>>>>>>>> "maxmem" requested by the user. For ARM the ACPI blob will be
>>>>>>>>>>> exposed
>>>>>>>>>>> at a specific address that is outside of the guest RAM (see the
>>>>>>>>>>> guest
>>>>>>>>>>> memory layout in public/arch-arm.h).
>>>>>>>>>>>
>>>>>>>>>>> We chose this solution over putting in the RAM because the ACPI
>>>>>>>>>>> tables
>>>>>>>>>>> are not easily relocatable (compare to the device tree, initrd and
>>>>>>>>>>> kernel) so we could not take advantage of superpage in both stage-2
>>>>>>>>>>> (hypervisor) and stage-1 (kernel) page table.
>>>>>>>>>> Maybe this is something ARM-specific then. For x86 we will want to
>>>>>>>>>> keep
>>>>>>>>>> maxmem unchanged.
>>>>>>>>> I don't think what I described in my previous mail is
>>>>>>>>> ARM-specific. The
>>>>>>>>> pressure will be more important on the TLBs, if Xen does not use
>>>>>>>>> superpage in
>>>>>>>>> the stage 2 page tables (i.e EPT for x86) no matter the architecture.
>>>>>>>>>
>>>>>>>>> IHMO, this seems to be a bigger drawback compare to add few more
>>>>>>>>> kilobytes to
>>>>>>>>> maxmem in the toolstack for the ACPI blob. You will loose them when
>>>>>>>>> creating
>>>>>>>>> the intermediate page table in any case.
>>>>>>>> I agree with Julien. On ARM we have to increase maxmem because I don't
>>>>>>>> think that shattering a superpage is acceptable for just a few KBs. In
>>>>>>>> fact, it's not much about increasing maxmem, but it's about keeping
>>>>>>>> the
>>>>>>>> allocation of guest memory to the value passed by the user in
>>>>>>>> "memory",
>>>>>>>> so that it can be done in the most efficient way possible. (I am
>>>>>>>> assuming users are going to allocate VMs of 2048MB, rather than
>>>>>>>> 2049MB.)
>>>>>>>>
>>>>>>>> I wouldn't want to end up adding to the performance tuning page on the
>>>>>>>> wiki "Make sure to add 1 more MB to the memory of your VM to get the
>>>>>>>> most out of the system."
>>>>>>>>
>>>>>>>> I know that the location of the ACPI blob on x86 is different in guest
>>>>>>>> memory space, but it seems to me that the problem would be the
>>>>>>>> same. Do
>>>>>>>> you have 1 gigabyte pages in stage-2 on x86? If so, I would think
>>>>>>>> twice
>>>>>>>> about this. Otherwise, if you only have 4K and 2MB allocations,
>>>>>>>> then it
>>>>>>>> might not make that much of a difference.
>>>>>>> Looking at the x86 code, 1 gigabyte pages seems to be supported.
>>>>>>>
>>>>>>> Boris, do you have any opinions on this?
>>>>>>
>>>>>> I don't think I understand the superpage shattering argument.  In x86
>>>>>> the tables live in regular RAM and a guest is free to use those
>>>>>> addresses as regular memory.
>>>>>>
>>>>>> This apparently is different from how ARM manages the tables (you said
>>>>>> in an earlier message that they are not part of RAM) so I can see that
>>>>>> taking memory from RAM allocation to store the tables may affect how
>>>>>> mapping is done, potentially causing GB pages to be broken.
>>>>>>
>>>>>> In fact (and I am totally speculating here) padding memory for x86 may
>>>>>> actually *cause* shattering because we will have (for example) 2049MB of
>>>>>> RAM to deal with.
>>>>> Correct me if I am wrong. On your series you are populating the page
>>>>> at a specific address for the ACPI tables separately to the RAM
>>>>> allocation. So you will shatter GB pages if the user provides 2048MB
>>>>> because the ACPI tables is accounted in the 2048MB.
>>>> And to be honest I am not convinced this was a well selected address
>>>> (0xfc000000). I am actually thinking about moving it down (this may
>>>> require changing dsdt.asl). I don't know whether I will actually do it
>>>> in this version but it is certainly a possibility.
>>> I don't understand how this statement fits in the discussion.
>>>
>>> The memory allocation for the ACPI blob is done by the toolstack
>>> separately from the rest of guest memory, leading to two separate
>>> stage-2 pagetable allocations of less than 1-gigabyte pages. Is that
>>> correct?
>>
>>
>> If I move the table lower into memory we won't have to do any extra
>> allocation. The memory will have been already allocated for the guest,
>> we just map it and copy the tables.
>
> I see, thanks for the explanation. I think this could work for ARM too
> and should avoid the stage-2 shattering issue described above.

But you will end up to have stage-1 shattering issue if you put the ACPI 
tables lower into the guest RAM, reducing the overall performance. It is 
why I first asked Shannon to put the ACPI outside of the guest RAM.

> Julien, what do you think? I agree that having the ACPI blob separate
> would be cleaner, but using the same allocation scheme as x86 is
> important.

While I agree that having the same scheme is important, I care a lot 
more about performance.

So far, I have concerns about performance on all Boris suggestions. I 
believe that the impact is the same on x86. However it seems that it is 
not important for the x86 folks.

I think the scheme I suggested is the best one, because it will maximize 
the theoretical performance of the guests (a guest is free to not use 
superpage).

I could be convinced to put the ACPI tables at the end of the guest RAM, 
although this would require more code in the toolstack because the ACPI 
base address will not be static anymore.

Regards,
Julien Grall July 21, 2016, 7:14 p.m. UTC | #13
On 21/07/2016 19:54, Stefano Stabellini wrote:
> On Thu, 21 Jul 2016, Julien Grall wrote:
>> Hi Stefano,
>>
>> On 21/07/16 18:53, Stefano Stabellini wrote:
>>> On Wed, 20 Jul 2016, Boris Ostrovsky wrote:
>>>> On 07/20/2016 01:28 PM, Stefano Stabellini wrote:
>>>>> On Wed, 20 Jul 2016, Boris Ostrovsky wrote:
>>>>>> On 07/20/2016 09:41 AM, Julien Grall wrote:
>>>>>>>
>>>>>>> On 20/07/2016 14:33, Boris Ostrovsky wrote:
>>>>>>>> On 07/20/2016 08:33 AM, Julien Grall wrote:
>>>>>>>>> Hi,
>>>>>>>>>
>>>>>>>>> On 14/07/16 14:37, Stefano Stabellini wrote:
>>>>>>>>>> On Wed, 13 Jul 2016, Julien Grall wrote:
>>>>>>>>>>> Hello,
>>>>>>>>>>>
>>>>>>>>>>> On 12/07/2016 17:58, Boris Ostrovsky wrote:
>>>>>>>>>>>> On 07/12/2016 12:10 PM, Julien Grall wrote:
>>>>>>>>>>>>> On 12/07/2016 16:08, Boris Ostrovsky wrote:
>>>>>>>>>>>>>> On 07/12/2016 10:57 AM, Shannon Zhao wrote:
>>>>>>>>>>>>> It will affect some others part of the guest if we don't
>>>>>>>>>>>>> increment
>>>>>>>>>>>>> the
>>>>>>>>>>>>> "maxmem" requested by the user. For ARM the ACPI blob
>>>>>>>>>>>>> will be
>>>>>>>>>>>>> exposed
>>>>>>>>>>>>> at a specific address that is outside of the guest RAM
>>>>>>>>>>>>> (see the
>>>>>>>>>>>>> guest
>>>>>>>>>>>>> memory layout in public/arch-arm.h).
>>>>>>>>>>>>>
>>>>>>>>>>>>> We chose this solution over putting in the RAM because
>>>>>>>>>>>>> the ACPI
>>>>>>>>>>>>> tables
>>>>>>>>>>>>> are not easily relocatable (compare to the device tree,
>>>>>>>>>>>>> initrd and
>>>>>>>>>>>>> kernel) so we could not take advantage of superpage in
>>>>>>>>>>>>> both stage-2
>>>>>>>>>>>>> (hypervisor) and stage-1 (kernel) page table.
>>>>>>>>>>>> Maybe this is something ARM-specific then. For x86 we will
>>>>>>>>>>>> want to
>>>>>>>>>>>> keep
>>>>>>>>>>>> maxmem unchanged.
>>>>>>>>>>> I don't think what I described in my previous mail is
>>>>>>>>>>> ARM-specific. The
>>>>>>>>>>> pressure will be more important on the TLBs, if Xen does not
>>>>>>>>>>> use
>>>>>>>>>>> superpage in
>>>>>>>>>>> the stage 2 page tables (i.e EPT for x86) no matter the
>>>>>>>>>>> architecture.
>>>>>>>>>>>
>>>>>>>>>>> IHMO, this seems to be a bigger drawback compare to add few
>>>>>>>>>>> more
>>>>>>>>>>> kilobytes to
>>>>>>>>>>> maxmem in the toolstack for the ACPI blob. You will loose
>>>>>>>>>>> them when
>>>>>>>>>>> creating
>>>>>>>>>>> the intermediate page table in any case.
>>>>>>>>>> I agree with Julien. On ARM we have to increase maxmem because
>>>>>>>>>> I don't
>>>>>>>>>> think that shattering a superpage is acceptable for just a few
>>>>>>>>>> KBs. In
>>>>>>>>>> fact, it's not much about increasing maxmem, but it's about
>>>>>>>>>> keeping
>>>>>>>>>> the
>>>>>>>>>> allocation of guest memory to the value passed by the user in
>>>>>>>>>> "memory",
>>>>>>>>>> so that it can be done in the most efficient way possible. (I
>>>>>>>>>> am
>>>>>>>>>> assuming users are going to allocate VMs of 2048MB, rather
>>>>>>>>>> than
>>>>>>>>>> 2049MB.)
>>>>>>>>>>
>>>>>>>>>> I wouldn't want to end up adding to the performance tuning
>>>>>>>>>> page on the
>>>>>>>>>> wiki "Make sure to add 1 more MB to the memory of your VM to
>>>>>>>>>> get the
>>>>>>>>>> most out of the system."
>>>>>>>>>>
>>>>>>>>>> I know that the location of the ACPI blob on x86 is different
>>>>>>>>>> in guest
>>>>>>>>>> memory space, but it seems to me that the problem would be the
>>>>>>>>>> same. Do
>>>>>>>>>> you have 1 gigabyte pages in stage-2 on x86? If so, I would
>>>>>>>>>> think
>>>>>>>>>> twice
>>>>>>>>>> about this. Otherwise, if you only have 4K and 2MB
>>>>>>>>>> allocations,
>>>>>>>>>> then it
>>>>>>>>>> might not make that much of a difference.
>>>>>>>>> Looking at the x86 code, 1 gigabyte pages seems to be supported.
>>>>>>>>>
>>>>>>>>> Boris, do you have any opinions on this?
>>>>>>>>
>>>>>>>> I don't think I understand the superpage shattering argument.  In
>>>>>>>> x86
>>>>>>>> the tables live in regular RAM and a guest is free to use those
>>>>>>>> addresses as regular memory.
>>>>>>>>
>>>>>>>> This apparently is different from how ARM manages the tables (you
>>>>>>>> said
>>>>>>>> in an earlier message that they are not part of RAM) so I can see
>>>>>>>> that
>>>>>>>> taking memory from RAM allocation to store the tables may affect
>>>>>>>> how
>>>>>>>> mapping is done, potentially causing GB pages to be broken.
>>>>>>>>
>>>>>>>> In fact (and I am totally speculating here) padding memory for x86
>>>>>>>> may
>>>>>>>> actually *cause* shattering because we will have (for example)
>>>>>>>> 2049MB of
>>>>>>>> RAM to deal with.
>>>>>>> Correct me if I am wrong. On your series you are populating the page
>>>>>>> at a specific address for the ACPI tables separately to the RAM
>>>>>>> allocation. So you will shatter GB pages if the user provides 2048MB
>>>>>>> because the ACPI tables is accounted in the 2048MB.
>>>>>> And to be honest I am not convinced this was a well selected address
>>>>>> (0xfc000000). I am actually thinking about moving it down (this may
>>>>>> require changing dsdt.asl). I don't know whether I will actually do it
>>>>>> in this version but it is certainly a possibility.
>>>>> I don't understand how this statement fits in the discussion.
>>>>>
>>>>> The memory allocation for the ACPI blob is done by the toolstack
>>>>> separately from the rest of guest memory, leading to two separate
>>>>> stage-2 pagetable allocations of less than 1-gigabyte pages. Is that
>>>>> correct?
>>>>
>>>>
>>>> If I move the table lower into memory we won't have to do any extra
>>>> allocation. The memory will have been already allocated for the guest,
>>>> we just map it and copy the tables.
>>>
>>> I see, thanks for the explanation. I think this could work for ARM too
>>> and should avoid the stage-2 shattering issue described above.
>>
>> But you will end up to have stage-1 shattering issue if you put the ACPI
>> tables lower into the guest RAM, reducing the overall performance. It is why I
>> first asked Shannon to put the ACPI outside of the guest RAM.
>
> I am not sure about this actually: even with the ACPI blob in the middle
> of guest memory, the guest OS could still use a single superpage for its
> own stage-1 memory mappings. I don't know if Linux does it that way, but
> it should be possible.

You are assuming that the guest will map the ACPI blob with the same 
attributes as the rest of the superpage.

IHMO, a sane operating system will want to map the ACPI blob read-only.

Regards,
Julien Grall July 25, 2016, 9:46 a.m. UTC | #14
Hi George,

On 25/07/16 09:38, George Dunlap wrote:
> On Thu, Jul 21, 2016 at 10:15 PM, Stefano Stabellini
> <sstabellini@kernel.org> wrote:
>>> You are assuming that the guest will map the ACPI blob with the same
>>> attributes as the rest of the superpage.
>>>
>>> IHMO, a sane operating system will want to map the ACPI blob read-only.
>>
>> That's true. But there are other things which might be mapped
>> differently and could shatter a stage-1 superpage mapping (especially on
>> x86 that has a much more complex memory map than ARM). Obviously adding
>> one more is not doing it any good, but it might not make a difference in
>> practice.
>>
>> Anyway, I agree with Julien that his suggestion is the best for ARM. If
>> the libxl maintainers are willing to accept two different code paths for
>> this on ARM and x86, then I am fine with it too.
>
> Sorry to be a bit late to this thread -- there's a interface principle
> that I think we should at some point have a larger discussion about:
> whether "maxmem" means the amount of RAM which the guest sees as RAM,
> or whether "maxmem" means the amount of RAM that the administrator
> sees as used by the guest.  At the moment tnhere's no consistent
> answer actually; but I am strongly of the opinion that for usability
> the best answer is for "memory" to be the *total* amount of *host*
> memory used by the guest.  In an ideal world, the admin should be able
> to do "xl info", see that there is 3000MiB free, and then start a
> guest with 3000MiB and expect it to succeed.  At the moment he has to
> guess.

To confirm, do you include memory allocated by the hypervisor to keep 
track of the guest (i.e struc domain, struct vcpu...)?

If not, the problem stays the same because the admin will have to know 
how much memory Xen will allocate to keep track of the guest. So if "xl 
info" tells you that 3000MiB is free, you will only be able to use 
3000MiB - few kilobytes.

If yes, this would be a problem for migration because a newer version of 
Xen may allocate less/more memory.

Furthermore, with this suggestion, we will likely not be able to take 
advantage of 1GB superpage unless the admin knows how much memory will 
be used by Xen outside of the guest RAM (such as the console page, 
xenstore page, ....).

IHMO, it would be easier if we define "maxmem" as the usable for guest 
RAM and then let the toolstack take add the extra memory (ACPI blob, 
special pages...).

Regards,
Julien Grall July 28, 2016, 11:06 a.m. UTC | #15
Hi,

On 26/07/16 02:17, Boris Ostrovsky wrote:
> On 07/25/2016 07:40 PM, Stefano Stabellini wrote:
>> On Mon, 25 Jul 2016, Boris Ostrovsky wrote:
>>> On 07/25/2016 06:06 PM, Stefano Stabellini wrote:
>>>> On Mon, 25 Jul 2016, George Dunlap wrote:
>>>>> On Thu, Jul 21, 2016 at 10:15 PM, Stefano Stabellini
>>>>> <sstabellini@kernel.org> wrote:
>>>> Going back to the discussion about how to account for the ACPI blob in
>>>> maxmem, let's make this simple, if we increase maxmem by the size of
>>>> the
>>>> ACPI blob:
>>>>
>>>> - the toolstack allocates more RAM than expected (bad)
>>>> - when the admin specifies 1GB of RAM, the guest actually gets 1GB of
>>>>    usable RAM (good)
>>>> - things are faster as Xen and the guest can exploit superpage mappings
>>>>    more easily at stage-1 and stage-2 (good)
>>>>
>>>> Let's call this option A.
>>>>
>>>> If we do not increase maxmem:
>>>>
>>>> - the toolstack allocates less RAM, closer to the size specified in the
>>>>    VM config file (good)
>>>> - the guest gets less usable memory than expected, less than what was
>>>>    specified in the VM config file (bad)
>>>
>>> Not sure I agree with this, at least for x86/Linux: guest gets 1GB of
>>> usable RAM and part of that RAM stores ACPI stuff. Guest is free to
>>> stash ACPI tables somewhere else or ignore them altogether and use that
>>> memory for whatever it wants.
>> On ARM it will be a ROM (from guest POV)
>
>
> In which case I don't see why we should take it from maxmem allocation.
> I somehow thought that there was a choice of whether to put it in ROM or
> RAM on ARM but if it's ROM only then I don't think there is an option.

We have option to do the both on ARM. I just feel that the ROM option is 
a cleaner interface because the ACPI tables are not supposed be modified 
by the guest, so we can prevent to be overridden (+ all the advantages 
mentioned by Stefano with option A).

> IIUIC the toolstack pretends that the blob goes to memory because that's
> how its interfaces work but that space is not really what we think about
> when we set memory/maxmem in the configuration file. Unlike x86.

I think we need to draw a conclusion for Shannon to continue to do the 
work and I would like to see this series in Xen 4.8. From my 
understanding you are for option B, so does George.

Stefano votes for option A, but find B acceptable. Any other opinions?

Regards,
Julien Grall July 28, 2016, 11:10 a.m. UTC | #16
Hi Shannon,

Sorry for the late answer.

On 25/07/16 08:56, Shannon Zhao wrote:
>
>
> On 2016/7/20 17:32, Wei Liu wrote:
>> On Wed, Jul 20, 2016 at 02:52:05PM +0800, Shannon Zhao wrote:
>>>>
>>>>
>>>> On 2016/7/19 18:38, Wei Liu wrote:
>>>>>> On Fri, Jul 15, 2016 at 05:39:32PM +0800, Shannon Zhao wrote:
>>>>>> [...]
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> It would be trivial to have another option in xl.cfg to allow MB
>>>>>>>>>>>>>> granularity. But I don't think that's a good idea. Asking for more
>>>>>>>>>>>>>> memory when you don't really know how much is enough is not very useful.
>>>>>>>>>>>>>> If an admin can know how much is needed, surely the library can be
>>>>>>>>>>>>>> taught to obtain that knowledge, too.
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> We need to decide which model we should go with. And, if we decide to
>>>>>>>>>>>>>> diverge, document the difference between x86 and ARM model.
>>>>>>>>>>>>>>
>>>>>>>>>> Hi Wei,
>>>>>>>>>>
>>>>>>>>>> Do you decide how to add the size of ACPI blob to max_memkb?
>>>>>>>>>>
>>>>>> AFAICT ARM and x86 maintainers hold different opinions on how memory
>>>>>> should be accounted.
>>>>>>
>>>>>> I would like to have a unified memory accounting model. But if we can't
>>>>>> have that at the moment, I'm fine with divergence, but please document
>>>>>> it somewhere (comment near code snippet, in header, or a file under docs
>>>>>> etc). And the amount added to max_memkb needs to be properly calculated,
>>>>>> not some magic number, so that we have a chance in the future to
>>>>>> confidently change how we do thing.
>>>> If it's only allowed to add the size to max_memkb in
>>>> libxl__domain_build_info_setdefault(), it only can use a const number
>>>> since the tables are not generted and we don;t know the size. But the
>>>> const number could be chosen properly since we could know the maximum
>>>> ACPI tables size of current ARM approach.
>>>>
>>>> But maybe in the future, if we add some new ACPI tables, it should
>>>> increase the size as well. So I think this should be documented.
>>>>
>>>> As I asked before, is it ok to add the size to max_memkb after
>>>> generating the ACPI tables and before loading the tables to guest space?
>>>>
>> Yes. I don't think shoehorning everything into setdefault is a good
>> idea.
>>
>> I think libxl_arm.c:libxl__arch_domain_create would be the right place
>> to do it.
>>
>> I am thinking about calling xc_domain_setmaxmem there, but not adding a
>> number to d_config->b_info.max_memkb. Adding that to ->max_memkb would
>> be wrong because the bigger ->max_memkb will be recored and the same
>> algorithm will be applied every time you migrate your guest, so the
>> max_memkb will grow bigger and bigger.
>>
>> Given the different approach taken by ARM and x86, maybe we need to also
>> record the size of acpi blobs somewhere in xenstore (also needs to be
>> documented) so that subsequent libxl_domain_setmaxmem can extract that
>> number again.
>>
>> Please wait a bit for Stefano and Julien to comment before you do work.
>>
> Stefano, Julien,
> Any comments regarding how to add the ACPI size to max_memkb?

I think Wei's suggestion is a good one.

Cheers,
Shannon Zhao July 28, 2016, 12:42 p.m. UTC | #17
On 2016年07月28日 19:06, Julien Grall wrote:
> On 26/07/16 02:17, Boris Ostrovsky wrote:
>> On 07/25/2016 07:40 PM, Stefano Stabellini wrote:
>>> On Mon, 25 Jul 2016, Boris Ostrovsky wrote:
>>>> On 07/25/2016 06:06 PM, Stefano Stabellini wrote:
>>>>> On Mon, 25 Jul 2016, George Dunlap wrote:
>>>>>> On Thu, Jul 21, 2016 at 10:15 PM, Stefano Stabellini
>>>>>> <sstabellini@kernel.org> wrote:
>>>>> Going back to the discussion about how to account for the ACPI blob in
>>>>> maxmem, let's make this simple, if we increase maxmem by the size of
>>>>> the
>>>>> ACPI blob:
>>>>>
>>>>> - the toolstack allocates more RAM than expected (bad)
>>>>> - when the admin specifies 1GB of RAM, the guest actually gets 1GB of
>>>>>    usable RAM (good)
>>>>> - things are faster as Xen and the guest can exploit superpage
>>>>> mappings
>>>>>    more easily at stage-1 and stage-2 (good)
>>>>>
>>>>> Let's call this option A.
>>>>>
>>>>> If we do not increase maxmem:
>>>>>
>>>>> - the toolstack allocates less RAM, closer to the size specified in
>>>>> the
>>>>>    VM config file (good)
>>>>> - the guest gets less usable memory than expected, less than what was
>>>>>    specified in the VM config file (bad)
>>>>
>>>> Not sure I agree with this, at least for x86/Linux: guest gets 1GB of
>>>> usable RAM and part of that RAM stores ACPI stuff. Guest is free to
>>>> stash ACPI tables somewhere else or ignore them altogether and use that
>>>> memory for whatever it wants.
>>> On ARM it will be a ROM (from guest POV)
>>
>>
>> In which case I don't see why we should take it from maxmem allocation.
>> I somehow thought that there was a choice of whether to put it in ROM or
>> RAM on ARM but if it's ROM only then I don't think there is an option.
> 
> We have option to do the both on ARM. I just feel that the ROM option is
> a cleaner interface because the ACPI tables are not supposed be modified
> by the guest, so we can prevent to be overridden (+ all the advantages
> mentioned by Stefano with option A).
> 
>> IIUIC the toolstack pretends that the blob goes to memory because that's
>> how its interfaces work but that space is not really what we think about
>> when we set memory/maxmem in the configuration file. Unlike x86.
> 
> I think we need to draw a conclusion for Shannon to continue to do the
> work and I would like to see this series in Xen 4.8. From my
> understanding you are for option B, so does George.
> 
> Stefano votes for option A, but find B acceptable. Any other opinions?
I agree with Stefano, both are fine.

Thanks,
Julien Grall Aug. 3, 2016, 7:20 p.m. UTC | #18
Hi Wei,

On 02/08/16 12:01, Wei Liu wrote:
> On Thu, Jul 28, 2016 at 08:42:05PM +0800, Shannon Zhao wrote:
>> On 2016年07月28日 19:06, Julien Grall wrote:
>>> On 26/07/16 02:17, Boris Ostrovsky wrote:
>>>> On 07/25/2016 07:40 PM, Stefano Stabellini wrote:
>>>>> On Mon, 25 Jul 2016, Boris Ostrovsky wrote:
>>>>>> On 07/25/2016 06:06 PM, Stefano Stabellini wrote:
>>>>>>> On Mon, 25 Jul 2016, George Dunlap wrote:
>>>>>>>> On Thu, Jul 21, 2016 at 10:15 PM, Stefano Stabellini
>>>>>>>> <sstabellini@kernel.org> wrote:
>>>>>>> Going back to the discussion about how to account for the ACPI blob in
>>>>>>> maxmem, let's make this simple, if we increase maxmem by the size of
>>>>>>> the
>>>>>>> ACPI blob:
>>>>>>>
>>>>>>> - the toolstack allocates more RAM than expected (bad)
>>>>>>> - when the admin specifies 1GB of RAM, the guest actually gets 1GB of
>>>>>>>    usable RAM (good)
>>>>>>> - things are faster as Xen and the guest can exploit superpage
>>>>>>> mappings
>>>>>>>    more easily at stage-1 and stage-2 (good)
>>>>>>>
>>>>>>> Let's call this option A.
>>>>>>>
>>>>>>> If we do not increase maxmem:
>>>>>>>
>>>>>>> - the toolstack allocates less RAM, closer to the size specified in
>>>>>>> the
>>>>>>>    VM config file (good)
>>>>>>> - the guest gets less usable memory than expected, less than what was
>>>>>>>    specified in the VM config file (bad)
>>>>>>
>>>>>> Not sure I agree with this, at least for x86/Linux: guest gets 1GB of
>>>>>> usable RAM and part of that RAM stores ACPI stuff. Guest is free to
>>>>>> stash ACPI tables somewhere else or ignore them altogether and use that
>>>>>> memory for whatever it wants.
>>>>> On ARM it will be a ROM (from guest POV)
>>>>
>>>>
>>>> In which case I don't see why we should take it from maxmem allocation.
>>>> I somehow thought that there was a choice of whether to put it in ROM or
>>>> RAM on ARM but if it's ROM only then I don't think there is an option.
>>>
>>> We have option to do the both on ARM. I just feel that the ROM option is
>>> a cleaner interface because the ACPI tables are not supposed be modified
>>> by the guest, so we can prevent to be overridden (+ all the advantages
>>> mentioned by Stefano with option A).
>>>
>>>> IIUIC the toolstack pretends that the blob goes to memory because that's
>>>> how its interfaces work but that space is not really what we think about
>>>> when we set memory/maxmem in the configuration file. Unlike x86.
>>>
>>> I think we need to draw a conclusion for Shannon to continue to do the
>>> work and I would like to see this series in Xen 4.8. From my
>>> understanding you are for option B, so does George.
>>>
>>> Stefano votes for option A, but find B acceptable. Any other opinions?
>> I agree with Stefano, both are fine.
>>
>
> Sorry for the late reply.
>
> Are you now unblocked? If not, what is not yet decided or needed
> clarification?

I don't think there was a strict consensus. I think this is something we 
can revisit later if necessary as the guest interface does not tie up to 
a specific physical address (The UEFI firmware should retrieve the 
information from the device tree).

So, Shannon could continue towards solution A. I.e the ACPI blob is 
loaded outside of the guest RAM?

If someone disagree please speak up. But we should unblock Shannon to 
get this series in Xen 4.8.

Regards,
diff mbox

Patch

diff --git a/tools/libxc/xc_dom_arm.c b/tools/libxc/xc_dom_arm.c
index 64a8b67..6a0a5b7 100644
--- a/tools/libxc/xc_dom_arm.c
+++ b/tools/libxc/xc_dom_arm.c
@@ -63,6 +63,47 @@  static int setup_pgtables_arm(struct xc_dom_image *dom)
 
 /* ------------------------------------------------------------------------ */
 
+static int xc_dom_copy_acpi(struct xc_dom_image *dom)
+{
+    int rc, i;
+    uint32_t pages_num = ROUNDUP(dom->acpitable_size, XC_PAGE_SHIFT) >>
+                         XC_PAGE_SHIFT;
+    const xen_pfn_t base = GUEST_ACPI_BASE >> XC_PAGE_SHIFT;
+    xen_pfn_t *p2m;
+    void *acpi_pages;
+
+    p2m = malloc(pages_num * sizeof(*p2m));
+    for (i = 0; i < pages_num; i++)
+        p2m[i] = base + i;
+
+    rc = xc_domain_populate_physmap_exact(dom->xch, dom->guest_domid,
+                                          pages_num, 0, 0, p2m);
+    if ( rc )
+    {
+        DOMPRINTF("%s: xc_domain_populate_physmap_exact failed with %d",
+                  __FUNCTION__, rc);
+        goto out;
+    }
+
+    acpi_pages = xc_map_foreign_range(dom->xch, dom->guest_domid,
+                                      PAGE_SIZE * pages_num,
+                                      PROT_READ | PROT_WRITE, base);
+    if ( !acpi_pages )
+    {
+        DOMPRINTF("%s Can't map acpi_pages", __FUNCTION__);
+        rc = -1;
+        goto out;
+    }
+
+    memcpy(acpi_pages, dom->acpitable_blob, dom->acpitable_size);
+
+out:
+    munmap(acpi_pages, pages_num * PAGE_SIZE);
+    free(p2m);
+
+    return rc;
+}
+
 static int alloc_magic_pages(struct xc_dom_image *dom)
 {
     int rc, i;
@@ -100,6 +141,16 @@  static int alloc_magic_pages(struct xc_dom_image *dom)
     xc_hvm_param_set(dom->xch, dom->guest_domid, HVM_PARAM_STORE_EVTCHN,
             dom->xenstore_evtchn);
 
+    if ( dom->acpitable_blob && dom->acpitable_size > 0 )
+    {
+        rc = xc_dom_copy_acpi(dom);
+        if ( rc != 0 )
+        {
+            DOMPRINTF("Unable to copy ACPI tables");
+            return rc;
+        }
+    }
+
     return 0;
 }