diff mbox series

[RFC] hw/virtio/vhost: re-factor vhost-section and allow DIRTY_MEMORY_CODE

Message ID 20200604111323.7458-1-alex.bennee@linaro.org
State New
Headers show
Series [RFC] hw/virtio/vhost: re-factor vhost-section and allow DIRTY_MEMORY_CODE | expand

Commit Message

Alex Bennée June 4, 2020, 11:13 a.m. UTC
The purpose of vhost_section is to identify RAM regions that need to
be made available to a vhost client. However when running under TCG
all RAM sections have DIRTY_MEMORY_CODE set which leads to problems
down the line. The original comment implies VGA regions are a problem
but doesn't explain why vhost has a problem with it.

Re-factor the code so:

  - steps are clearer to follow
  - reason for rejection is recorded in the trace point
  - we allow DIRTY_MEMORY_CODE when TCG is enabled

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>
Cc: Stefan Hajnoczi <stefanha@redhat.com>
---
 hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------
 1 file changed, 32 insertions(+), 14 deletions(-)

-- 
2.20.1

Comments

Michael S. Tsirkin June 4, 2020, 11:24 a.m. UTC | #1
On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:
> The purpose of vhost_section is to identify RAM regions that need to

> be made available to a vhost client. However when running under TCG

> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

> down the line. The original comment implies VGA regions are a problem

> but doesn't explain why vhost has a problem with it.

> 

> Re-factor the code so:

> 

>   - steps are clearer to follow

>   - reason for rejection is recorded in the trace point

>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

> 

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Michael S. Tsirkin <mst@redhat.com>

> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

> Cc: Stefan Hajnoczi <stefanha@redhat.com>

> ---

>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>  1 file changed, 32 insertions(+), 14 deletions(-)

> 

> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

> index aff98a0ede5..f81fc87e74c 100644

> --- a/hw/virtio/vhost.c

> +++ b/hw/virtio/vhost.c

> @@ -27,6 +27,7 @@

>  #include "migration/blocker.h"

>  #include "migration/qemu-file-types.h"

>  #include "sysemu/dma.h"

> +#include "sysemu/tcg.h"

>  #include "trace.h"

>  

>  /* enabled until disconnected backend stabilizes */

> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

>      return r;

>  }

>  

> +/*

> + * vhost_section: identify sections needed for vhost access

> + *

> + * We only care about RAM sections here (where virtqueue can live). If

> + * we find one we still allow the backend to potentially filter it out

> + * of our list.

> + */

>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

>  {

> -    bool result;

> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

> -    result = memory_region_is_ram(section->mr) &&

> -        !memory_region_is_rom(section->mr);

> -

> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

> -     * than migration; this typically fires on VGA areas.

> -     */

> -    result &= !log_dirty;

> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;


I'm not sure what does this enum buy us as compared to bool.
Also why force OK to 0?
And I prefer an explicit "else result = NOT_RAM" below
instead of initializing it here.

> +

> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

> +        uint8_t handled_dirty;

>  

> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

> -        result &=

> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

> +        /*

> +         * Vhost doesn't handle any block which is doing dirty-tracking other

> +         * than migration; this typically fires on VGA areas. However

> +         * for TCG we also do dirty code page tracking which shouldn't

> +         * get in the way.

> +         */

> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

> +        if (tcg_enabled()) {

> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

> +        }


So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing
we can just allow this unconditionally.


> +        if (dirty_mask & ~handled_dirty) {

> +            result = DIRTY;

> +        } else if (dev->vhost_ops->vhost_backend_mem_section_filter &&

> +            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {

> +            result = FILTERED;

> +        } else {

> +            result = OK;

> +        }

>      }

>  

>      trace_vhost_section(section->mr->name, result);

> -    return result;

> +    return result == OK;

>  }

>  

>  static void vhost_begin(MemoryListener *listener)

> -- 

> 2.20.1
Alex Bennée June 4, 2020, 11:49 a.m. UTC | #2
Michael S. Tsirkin <mst@redhat.com> writes:

> On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

>> The purpose of vhost_section is to identify RAM regions that need to

>> be made available to a vhost client. However when running under TCG

>> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

>> down the line. The original comment implies VGA regions are a problem

>> but doesn't explain why vhost has a problem with it.

>> 

>> Re-factor the code so:

>> 

>>   - steps are clearer to follow

>>   - reason for rejection is recorded in the trace point

>>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

>> 

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Cc: Michael S. Tsirkin <mst@redhat.com>

>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

>> Cc: Stefan Hajnoczi <stefanha@redhat.com>

>> ---

>>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>>  1 file changed, 32 insertions(+), 14 deletions(-)

>> 

>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

>> index aff98a0ede5..f81fc87e74c 100644

>> --- a/hw/virtio/vhost.c

>> +++ b/hw/virtio/vhost.c

>> @@ -27,6 +27,7 @@

>>  #include "migration/blocker.h"

>>  #include "migration/qemu-file-types.h"

>>  #include "sysemu/dma.h"

>> +#include "sysemu/tcg.h"

>>  #include "trace.h"

>>  

>>  /* enabled until disconnected backend stabilizes */

>> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

>>      return r;

>>  }

>>  

>> +/*

>> + * vhost_section: identify sections needed for vhost access

>> + *

>> + * We only care about RAM sections here (where virtqueue can live). If

>> + * we find one we still allow the backend to potentially filter it out

>> + * of our list.

>> + */

>>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

>>  {

>> -    bool result;

>> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

>> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

>> -    result = memory_region_is_ram(section->mr) &&

>> -        !memory_region_is_rom(section->mr);

>> -

>> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

>> -     * than migration; this typically fires on VGA areas.

>> -     */

>> -    result &= !log_dirty;

>> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

>

> I'm not sure what does this enum buy us as compared to bool.


The only real point of the enum is to give a little more detailed
information to the trace point to expose why a section wasn't included.
In a previous iteration I just had the tracepoint at the bottom before a
return true where all other legs had returned false. We could switch to
just having the tracepoint hit for explicit inclusions?

> Also why force OK to 0?


Personal preference where 0 indicates success and !0 indicates failure
of various kinds. Again we can drop if we don't want the information in
the tracepoint.

> And I prefer an explicit "else result = NOT_RAM" below

> instead of initializing it here.


Ok.

>

>> +

>> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

>> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

>> +        uint8_t handled_dirty;

>>  

>> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

>> -        result &=

>> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

>> +        /*

>> +         * Vhost doesn't handle any block which is doing dirty-tracking other

>> +         * than migration; this typically fires on VGA areas. However

>> +         * for TCG we also do dirty code page tracking which shouldn't

>> +         * get in the way.

>> +         */

>> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

>> +        if (tcg_enabled()) {

>> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

>> +        }

>

> So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

> we can just allow this unconditionally.


Which actually makes the test:

  if (dirty_mask & DIRTY_MEMORY_VGA) {
     .. fail ..
  }

which is more in line with the comment although wouldn't fail if we
added additional DIRTY_MEMORY flags. This leads to the question what
exactly is it about DIRTY tracking that vhost doesn't like. Is it really
only avoiding having virtqueue in video RAM? Does this ever actually
happen?

I assume boards with unified memory models where video ram is shared
with system ram just end up partitioning the memory regions?

-- 
Alex Bennée
Michael S. Tsirkin June 4, 2020, 11:55 a.m. UTC | #3
On Thu, Jun 04, 2020 at 12:49:17PM +0100, Alex Bennée wrote:
> 

> Michael S. Tsirkin <mst@redhat.com> writes:

> 

> > On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

> >> The purpose of vhost_section is to identify RAM regions that need to

> >> be made available to a vhost client. However when running under TCG

> >> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

> >> down the line. The original comment implies VGA regions are a problem

> >> but doesn't explain why vhost has a problem with it.

> >> 

> >> Re-factor the code so:

> >> 

> >>   - steps are clearer to follow

> >>   - reason for rejection is recorded in the trace point

> >>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

> >> 

> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> >> Cc: Michael S. Tsirkin <mst@redhat.com>

> >> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

> >> Cc: Stefan Hajnoczi <stefanha@redhat.com>

> >> ---

> >>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

> >>  1 file changed, 32 insertions(+), 14 deletions(-)

> >> 

> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

> >> index aff98a0ede5..f81fc87e74c 100644

> >> --- a/hw/virtio/vhost.c

> >> +++ b/hw/virtio/vhost.c

> >> @@ -27,6 +27,7 @@

> >>  #include "migration/blocker.h"

> >>  #include "migration/qemu-file-types.h"

> >>  #include "sysemu/dma.h"

> >> +#include "sysemu/tcg.h"

> >>  #include "trace.h"

> >>  

> >>  /* enabled until disconnected backend stabilizes */

> >> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

> >>      return r;

> >>  }

> >>  

> >> +/*

> >> + * vhost_section: identify sections needed for vhost access

> >> + *

> >> + * We only care about RAM sections here (where virtqueue can live). If

> >> + * we find one we still allow the backend to potentially filter it out

> >> + * of our list.

> >> + */

> >>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

> >>  {

> >> -    bool result;

> >> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

> >> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

> >> -    result = memory_region_is_ram(section->mr) &&

> >> -        !memory_region_is_rom(section->mr);

> >> -

> >> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

> >> -     * than migration; this typically fires on VGA areas.

> >> -     */

> >> -    result &= !log_dirty;

> >> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

> >

> > I'm not sure what does this enum buy us as compared to bool.

> 

> The only real point of the enum is to give a little more detailed

> information to the trace point to expose why a section wasn't included.

> In a previous iteration I just had the tracepoint at the bottom before a

> return true where all other legs had returned false. We could switch to

> just having the tracepoint hit for explicit inclusions?


I didn't notice.  Yes, ok more tracepoints IMHO.

> > Also why force OK to 0?

> 

> Personal preference where 0 indicates success and !0 indicates failure

> of various kinds. Again we can drop if we don't want the information in

> the tracepoint.


So in that case we need to set all values so people can decode them
from the trace. But I think it's best to just have more trace points
or drop it from the trace.

> > And I prefer an explicit "else result = NOT_RAM" below

> > instead of initializing it here.

> 

> Ok.

> 

> >

> >> +

> >> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

> >> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

> >> +        uint8_t handled_dirty;

> >>  

> >> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

> >> -        result &=

> >> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

> >> +        /*

> >> +         * Vhost doesn't handle any block which is doing dirty-tracking other

> >> +         * than migration; this typically fires on VGA areas. However

> >> +         * for TCG we also do dirty code page tracking which shouldn't

> >> +         * get in the way.

> >> +         */

> >> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

> >> +        if (tcg_enabled()) {

> >> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

> >> +        }

> >

> > So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

> > we can just allow this unconditionally.

> 

> Which actually makes the test:

> 

>   if (dirty_mask & DIRTY_MEMORY_VGA) {

>      .. fail ..

>   }

> 

> which is more in line with the comment although wouldn't fail if we

> added additional DIRTY_MEMORY flags. This leads to the question what

> exactly is it about DIRTY tracking that vhost doesn't like.


vhost does not know how to track writes to specific regions. It can either
track all writes to memory (which slows it down quite a bit)
or no writes. It never actually *needs* to write to VGA,
so we do a hack and just skip these and then if that's the
only thing we need to track then we don't need to enable
its dirty tracking.

I don't really know what is DIRTY_MEMORY_CODE and when it's set.


> Is it really

> only avoiding having virtqueue in video RAM? Does this ever actually

> happen?


No to both.

> I assume boards with unified memory models where video ram is shared

> with system ram just end up partitioning the memory regions?


That's the assumption I think.

> 

> -- 

> Alex Bennée
Alex Bennée June 4, 2020, 12:39 p.m. UTC | #4
Michael S. Tsirkin <mst@redhat.com> writes:

> On Thu, Jun 04, 2020 at 12:49:17PM +0100, Alex Bennée wrote:

>> 

>> Michael S. Tsirkin <mst@redhat.com> writes:

>> 

>> > On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

>> >> The purpose of vhost_section is to identify RAM regions that need to

>> >> be made available to a vhost client. However when running under TCG

>> >> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

>> >> down the line. The original comment implies VGA regions are a problem

>> >> but doesn't explain why vhost has a problem with it.

>> >> 

>> >> Re-factor the code so:

>> >> 

>> >>   - steps are clearer to follow

>> >>   - reason for rejection is recorded in the trace point

>> >>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

>> >> 

>> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> >> Cc: Michael S. Tsirkin <mst@redhat.com>

>> >> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

>> >> Cc: Stefan Hajnoczi <stefanha@redhat.com>

>> >> ---

>> >>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>> >>  1 file changed, 32 insertions(+), 14 deletions(-)

>> >> 

>> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

>> >> index aff98a0ede5..f81fc87e74c 100644

>> >> --- a/hw/virtio/vhost.c

>> >> +++ b/hw/virtio/vhost.c

>> >> @@ -27,6 +27,7 @@

>> >>  #include "migration/blocker.h"

>> >>  #include "migration/qemu-file-types.h"

>> >>  #include "sysemu/dma.h"

>> >> +#include "sysemu/tcg.h"

>> >>  #include "trace.h"

>> >>  

>> >>  /* enabled until disconnected backend stabilizes */

>> >> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

>> >>      return r;

>> >>  }

>> >>  

>> >> +/*

>> >> + * vhost_section: identify sections needed for vhost access

>> >> + *

>> >> + * We only care about RAM sections here (where virtqueue can live). If

>> >> + * we find one we still allow the backend to potentially filter it out

>> >> + * of our list.

>> >> + */

>> >>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

>> >>  {

>> >> -    bool result;

>> >> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

>> >> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

>> >> -    result = memory_region_is_ram(section->mr) &&

>> >> -        !memory_region_is_rom(section->mr);

>> >> -

>> >> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

>> >> -     * than migration; this typically fires on VGA areas.

>> >> -     */

>> >> -    result &= !log_dirty;

>> >> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

>> >

>> > I'm not sure what does this enum buy us as compared to bool.

>> 

>> The only real point of the enum is to give a little more detailed

>> information to the trace point to expose why a section wasn't included.

>> In a previous iteration I just had the tracepoint at the bottom before a

>> return true where all other legs had returned false. We could switch to

>> just having the tracepoint hit for explicit inclusions?

>

> I didn't notice.  Yes, ok more tracepoints IMHO.


I can simplify to two:

  trace_vhost_section(mr->name)
  trace_vhost_reject_section(mr->name, int reason)

Not sure if it's worth defining a enum outside just for the purposes of
the trace though. Do we have the concept of per-trace event enum codes?

>> > Also why force OK to 0?

>> 

>> Personal preference where 0 indicates success and !0 indicates failure

>> of various kinds. Again we can drop if we don't want the information in

>> the tracepoint.

>

> So in that case we need to set all values so people can decode them

> from the trace. But I think it's best to just have more trace points

> or drop it from the trace.

>

>> > And I prefer an explicit "else result = NOT_RAM" below

>> > instead of initializing it here.

>> 

>> Ok.

>> 

>> >

>> >> +

>> >> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

>> >> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

>> >> +        uint8_t handled_dirty;

>> >>  

>> >> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

>> >> -        result &=

>> >> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

>> >> +        /*

>> >> +         * Vhost doesn't handle any block which is doing dirty-tracking other

>> >> +         * than migration; this typically fires on VGA areas. However

>> >> +         * for TCG we also do dirty code page tracking which shouldn't

>> >> +         * get in the way.

>> >> +         */

>> >> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

>> >> +        if (tcg_enabled()) {

>> >> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

>> >> +        }

>> >

>> > So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

>> > we can just allow this unconditionally.

>> 

>> Which actually makes the test:

>> 

>>   if (dirty_mask & DIRTY_MEMORY_VGA) {

>>      .. fail ..

>>   }

>> 

>> which is more in line with the comment although wouldn't fail if we

>> added additional DIRTY_MEMORY flags. This leads to the question what

>> exactly is it about DIRTY tracking that vhost doesn't like.

>

> vhost does not know how to track writes to specific regions. It can either

> track all writes to memory (which slows it down quite a bit)

> or no writes.


So can vhost interfere with dirty tracking itself in the kernel by
trapping the writes? I guess there is no way this can happen with
vhost-user?

(I wonder what would happen if a vhost-user daemon did an mprotect() on
RAM from it's shared view?)

> It never actually *needs* to write to VGA,

> so we do a hack and just skip these and then if that's the

> only thing we need to track then we don't need to enable

> its dirty tracking.

>

> I don't really know what is DIRTY_MEMORY_CODE and when it's set.


We use it softmmu do any pages that have code in them always force the
slow-path into cputlb for writes to those pages. This allows us to
detect self-modifying code. The kernel would never get involved but I
don't think vhost and TCG is compatible anyway. I'm only really
interested in vhost-user and it's interaction with TCG.

I'll spin a v2 now.

-- 
Alex Bennée
Philippe Mathieu-Daudé June 4, 2020, 12:58 p.m. UTC | #5
On 6/4/20 1:49 PM, Alex Bennée wrote:
> 

> Michael S. Tsirkin <mst@redhat.com> writes:

> 

>> On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

>>> The purpose of vhost_section is to identify RAM regions that need to

>>> be made available to a vhost client. However when running under TCG

>>> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

>>> down the line. The original comment implies VGA regions are a problem

>>> but doesn't explain why vhost has a problem with it.

>>>

>>> Re-factor the code so:

>>>

>>>   - steps are clearer to follow

>>>   - reason for rejection is recorded in the trace point

>>>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

>>>

>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>>> Cc: Michael S. Tsirkin <mst@redhat.com>

>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

>>> Cc: Stefan Hajnoczi <stefanha@redhat.com>

>>> ---

>>>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>>>  1 file changed, 32 insertions(+), 14 deletions(-)

[...]
>>> +

>>> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

>>> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

>>> +        uint8_t handled_dirty;

>>>  

>>> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

>>> -        result &=

>>> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

>>> +        /*

>>> +         * Vhost doesn't handle any block which is doing dirty-tracking other

>>> +         * than migration; this typically fires on VGA areas. However

>>> +         * for TCG we also do dirty code page tracking which shouldn't

>>> +         * get in the way.

>>> +         */

>>> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

>>> +        if (tcg_enabled()) {

>>> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

>>> +        }

>>

>> So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

>> we can just allow this unconditionally.

> 

> Which actually makes the test:

> 

>   if (dirty_mask & DIRTY_MEMORY_VGA) {


Eh? Shouldn't this be "if (dirty_mask & (1 << DIRTY_MEMORY_VGA))"?

>      .. fail ..

>   }

> 

> which is more in line with the comment although wouldn't fail if we

> added additional DIRTY_MEMORY flags. This leads to the question what

> exactly is it about DIRTY tracking that vhost doesn't like. Is it really

> only avoiding having virtqueue in video RAM? Does this ever actually

> happen?

> 

> I assume boards with unified memory models where video ram is shared

> with system ram just end up partitioning the memory regions?

>
Dr. David Alan Gilbert June 4, 2020, 1:07 p.m. UTC | #6
* Alex Bennée (alex.bennee@linaro.org) wrote:
> 

> Michael S. Tsirkin <mst@redhat.com> writes:

> 

> > On Thu, Jun 04, 2020 at 12:49:17PM +0100, Alex Bennée wrote:

> >> 

> >> Michael S. Tsirkin <mst@redhat.com> writes:

> >> 

> >> > On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

> >> >> The purpose of vhost_section is to identify RAM regions that need to

> >> >> be made available to a vhost client. However when running under TCG

> >> >> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

> >> >> down the line. The original comment implies VGA regions are a problem

> >> >> but doesn't explain why vhost has a problem with it.

> >> >> 

> >> >> Re-factor the code so:

> >> >> 

> >> >>   - steps are clearer to follow

> >> >>   - reason for rejection is recorded in the trace point

> >> >>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

> >> >> 

> >> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> >> >> Cc: Michael S. Tsirkin <mst@redhat.com>

> >> >> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

> >> >> Cc: Stefan Hajnoczi <stefanha@redhat.com>

> >> >> ---

> >> >>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

> >> >>  1 file changed, 32 insertions(+), 14 deletions(-)

> >> >> 

> >> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

> >> >> index aff98a0ede5..f81fc87e74c 100644

> >> >> --- a/hw/virtio/vhost.c

> >> >> +++ b/hw/virtio/vhost.c

> >> >> @@ -27,6 +27,7 @@

> >> >>  #include "migration/blocker.h"

> >> >>  #include "migration/qemu-file-types.h"

> >> >>  #include "sysemu/dma.h"

> >> >> +#include "sysemu/tcg.h"

> >> >>  #include "trace.h"

> >> >>  

> >> >>  /* enabled until disconnected backend stabilizes */

> >> >> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

> >> >>      return r;

> >> >>  }

> >> >>  

> >> >> +/*

> >> >> + * vhost_section: identify sections needed for vhost access

> >> >> + *

> >> >> + * We only care about RAM sections here (where virtqueue can live). If

> >> >> + * we find one we still allow the backend to potentially filter it out

> >> >> + * of our list.

> >> >> + */

> >> >>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

> >> >>  {

> >> >> -    bool result;

> >> >> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

> >> >> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

> >> >> -    result = memory_region_is_ram(section->mr) &&

> >> >> -        !memory_region_is_rom(section->mr);

> >> >> -

> >> >> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

> >> >> -     * than migration; this typically fires on VGA areas.

> >> >> -     */

> >> >> -    result &= !log_dirty;

> >> >> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

> >> >

> >> > I'm not sure what does this enum buy us as compared to bool.

> >> 

> >> The only real point of the enum is to give a little more detailed

> >> information to the trace point to expose why a section wasn't included.

> >> In a previous iteration I just had the tracepoint at the bottom before a

> >> return true where all other legs had returned false. We could switch to

> >> just having the tracepoint hit for explicit inclusions?

> >

> > I didn't notice.  Yes, ok more tracepoints IMHO.

> 

> I can simplify to two:

> 

>   trace_vhost_section(mr->name)

>   trace_vhost_reject_section(mr->name, int reason)

> 

> Not sure if it's worth defining a enum outside just for the purposes of

> the trace though. Do we have the concept of per-trace event enum codes?


If you want a 'reason' for the trace, then why not just make
  const char *result

Dave

> >> > Also why force OK to 0?

> >> 

> >> Personal preference where 0 indicates success and !0 indicates failure

> >> of various kinds. Again we can drop if we don't want the information in

> >> the tracepoint.

> >

> > So in that case we need to set all values so people can decode them

> > from the trace. But I think it's best to just have more trace points

> > or drop it from the trace.

> >

> >> > And I prefer an explicit "else result = NOT_RAM" below

> >> > instead of initializing it here.

> >> 

> >> Ok.

> >> 

> >> >

> >> >> +

> >> >> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

> >> >> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

> >> >> +        uint8_t handled_dirty;

> >> >>  

> >> >> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

> >> >> -        result &=

> >> >> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

> >> >> +        /*

> >> >> +         * Vhost doesn't handle any block which is doing dirty-tracking other

> >> >> +         * than migration; this typically fires on VGA areas. However

> >> >> +         * for TCG we also do dirty code page tracking which shouldn't

> >> >> +         * get in the way.

> >> >> +         */

> >> >> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

> >> >> +        if (tcg_enabled()) {

> >> >> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

> >> >> +        }

> >> >

> >> > So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

> >> > we can just allow this unconditionally.

> >> 

> >> Which actually makes the test:

> >> 

> >>   if (dirty_mask & DIRTY_MEMORY_VGA) {

> >>      .. fail ..

> >>   }

> >> 

> >> which is more in line with the comment although wouldn't fail if we

> >> added additional DIRTY_MEMORY flags. This leads to the question what

> >> exactly is it about DIRTY tracking that vhost doesn't like.

> >

> > vhost does not know how to track writes to specific regions. It can either

> > track all writes to memory (which slows it down quite a bit)

> > or no writes.

> 

> So can vhost interfere with dirty tracking itself in the kernel by

> trapping the writes? I guess there is no way this can happen with

> vhost-user?

> 

> (I wonder what would happen if a vhost-user daemon did an mprotect() on

> RAM from it's shared view?)

> 

> > It never actually *needs* to write to VGA,

> > so we do a hack and just skip these and then if that's the

> > only thing we need to track then we don't need to enable

> > its dirty tracking.

> >

> > I don't really know what is DIRTY_MEMORY_CODE and when it's set.

> 

> We use it softmmu do any pages that have code in them always force the

> slow-path into cputlb for writes to those pages. This allows us to

> detect self-modifying code. The kernel would never get involved but I

> don't think vhost and TCG is compatible anyway. I'm only really

> interested in vhost-user and it's interaction with TCG.

> 

> I'll spin a v2 now.

> 

> -- 

> Alex Bennée

> 

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Dr. David Alan Gilbert June 4, 2020, 1:26 p.m. UTC | #7
* Alex Bennée (alex.bennee@linaro.org) wrote:
> The purpose of vhost_section is to identify RAM regions that need to

> be made available to a vhost client. However when running under TCG

> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

> down the line. The original comment implies VGA regions are a problem

> but doesn't explain why vhost has a problem with it.

> 

> Re-factor the code so:

> 

>   - steps are clearer to follow

>   - reason for rejection is recorded in the trace point

>   - we allow DIRTY_MEMORY_CODE when TCG is enabled


The problem with VGA is that a VGA page can become mapped and unmapped
under the control of the guest; somewhere in a low address.  This tends
to break hugepage mappings.
For vhost-user, and in particular vhost-user-postcopy this means it
fails the mapping on the vhost-user client.

However the other problem is that with vhost-user, the vhost-user client
is changing memory; and won't mark the pages as dirty - except for
migration (I'm not clear if vhost kernel does this).
So TCG won't notice a page that's been changed by the driver; now in
most cases it's rare for a device to be writing directly into a page
you're going to execute out of, but it's not unknown.

So, as it is, any area that's expecting to get non-migration dirty
notifications is going to be disappointed by a vhost-user backend.

Dave

> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> Cc: Michael S. Tsirkin <mst@redhat.com>

> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

> Cc: Stefan Hajnoczi <stefanha@redhat.com>

> ---

>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>  1 file changed, 32 insertions(+), 14 deletions(-)

> 

> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

> index aff98a0ede5..f81fc87e74c 100644

> --- a/hw/virtio/vhost.c

> +++ b/hw/virtio/vhost.c

> @@ -27,6 +27,7 @@

>  #include "migration/blocker.h"

>  #include "migration/qemu-file-types.h"

>  #include "sysemu/dma.h"

> +#include "sysemu/tcg.h"

>  #include "trace.h"

>  

>  /* enabled until disconnected backend stabilizes */

> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

>      return r;

>  }

>  

> +/*

> + * vhost_section: identify sections needed for vhost access

> + *

> + * We only care about RAM sections here (where virtqueue can live). If

> + * we find one we still allow the backend to potentially filter it out

> + * of our list.

> + */

>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

>  {

> -    bool result;

> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

> -    result = memory_region_is_ram(section->mr) &&

> -        !memory_region_is_rom(section->mr);

> -

> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

> -     * than migration; this typically fires on VGA areas.

> -     */

> -    result &= !log_dirty;

> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

> +

> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

> +        uint8_t handled_dirty;

>  

> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

> -        result &=

> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

> +        /*

> +         * Vhost doesn't handle any block which is doing dirty-tracking other

> +         * than migration; this typically fires on VGA areas. However

> +         * for TCG we also do dirty code page tracking which shouldn't

> +         * get in the way.

> +         */

> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

> +        if (tcg_enabled()) {

> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

> +        }

> +        if (dirty_mask & ~handled_dirty) {

> +            result = DIRTY;

> +        } else if (dev->vhost_ops->vhost_backend_mem_section_filter &&

> +            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {

> +            result = FILTERED;

> +        } else {

> +            result = OK;

> +        }

>      }

>  

>      trace_vhost_section(section->mr->name, result);

> -    return result;

> +    return result == OK;

>  }

>  

>  static void vhost_begin(MemoryListener *listener)

> -- 

> 2.20.1

> 

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
Alex Bennée June 4, 2020, 1:50 p.m. UTC | #8
Philippe Mathieu-Daudé <philmd@redhat.com> writes:

> On 6/4/20 1:49 PM, Alex Bennée wrote:

>> 

>> Michael S. Tsirkin <mst@redhat.com> writes:

>> 

>>> On Thu, Jun 04, 2020 at 12:13:23PM +0100, Alex Bennée wrote:

>>>> The purpose of vhost_section is to identify RAM regions that need to

>>>> be made available to a vhost client. However when running under TCG

>>>> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

>>>> down the line. The original comment implies VGA regions are a problem

>>>> but doesn't explain why vhost has a problem with it.

>>>>

>>>> Re-factor the code so:

>>>>

>>>>   - steps are clearer to follow

>>>>   - reason for rejection is recorded in the trace point

>>>>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

>>>>

>>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>>>> Cc: Michael S. Tsirkin <mst@redhat.com>

>>>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

>>>> Cc: Stefan Hajnoczi <stefanha@redhat.com>

>>>> ---

>>>>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>>>>  1 file changed, 32 insertions(+), 14 deletions(-)

> [...]

>>>> +

>>>> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

>>>> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

>>>> +        uint8_t handled_dirty;

>>>>  

>>>> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

>>>> -        result &=

>>>> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

>>>> +        /*

>>>> +         * Vhost doesn't handle any block which is doing dirty-tracking other

>>>> +         * than migration; this typically fires on VGA areas. However

>>>> +         * for TCG we also do dirty code page tracking which shouldn't

>>>> +         * get in the way.

>>>> +         */

>>>> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

>>>> +        if (tcg_enabled()) {

>>>> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

>>>> +        }

>>>

>>> So DIRTY_MEMORY_CODE is only set by TCG right? Thus I'm guessing

>>> we can just allow this unconditionally.

>> 

>> Which actually makes the test:

>> 

>>   if (dirty_mask & DIRTY_MEMORY_VGA) {

>

> Eh? Shouldn't this be "if (dirty_mask & (1 << DIRTY_MEMORY_VGA))"?


Yeah - that's what I meant... I've left it as the other form in v2
though.

>

>>      .. fail ..

>>   }

>> 

>> which is more in line with the comment although wouldn't fail if we

>> added additional DIRTY_MEMORY flags. This leads to the question what

>> exactly is it about DIRTY tracking that vhost doesn't like. Is it really

>> only avoiding having virtqueue in video RAM? Does this ever actually

>> happen?

>> 

>> I assume boards with unified memory models where video ram is shared

>> with system ram just end up partitioning the memory regions?

>> 



-- 
Alex Bennée
Alex Bennée June 4, 2020, 2:02 p.m. UTC | #9
Dr. David Alan Gilbert <dgilbert@redhat.com> writes:

> * Alex Bennée (alex.bennee@linaro.org) wrote:

>> The purpose of vhost_section is to identify RAM regions that need to

>> be made available to a vhost client. However when running under TCG

>> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

>> down the line. The original comment implies VGA regions are a problem

>> but doesn't explain why vhost has a problem with it.

>> 

>> Re-factor the code so:

>> 

>>   - steps are clearer to follow

>>   - reason for rejection is recorded in the trace point

>>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

>

> The problem with VGA is that a VGA page can become mapped and unmapped

> under the control of the guest; somewhere in a low address.  This tends

> to break hugepage mappings.

> For vhost-user, and in particular vhost-user-postcopy this means it

> fails the mapping on the vhost-user client.

>

> However the other problem is that with vhost-user, the vhost-user client

> is changing memory; and won't mark the pages as dirty - except for

> migration (I'm not clear if vhost kernel does this).


For virtio this shouldn't be a problem because whatever the vhost-user
client writes to should never be read by the guest until it gets kicked
by the client to signal the virtqueue is done.

I guess migration is a fairly moot point given I haven't seen anything
outside of a test declare VHOST_F_LOG_ALL support.

> So TCG won't notice a page that's been changed by the driver; now in

> most cases it's rare for a device to be writing directly into a page

> you're going to execute out of, but it's not unknown.


Not unknown outside of bugs?

So stage 2 of this exercise is limiting the amount of exposed RAM to the
client to just the virtqueues themselves (which is all vhost-user-rpmb
should need).

> So, as it is, any area that's expecting to get non-migration dirty

> notifications is going to be disappointed by a vhost-user backend.


It's not outside the realms of possibility that we could implement
feedback to the softmmu/migration information from a vhost-user client
but for now I think it's safe to assume we are eliding over the issue.

>

> Dave

>

>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

>> Cc: Michael S. Tsirkin <mst@redhat.com>

>> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

>> Cc: Stefan Hajnoczi <stefanha@redhat.com>

>> ---

>>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

>>  1 file changed, 32 insertions(+), 14 deletions(-)

>> 

>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

>> index aff98a0ede5..f81fc87e74c 100644

>> --- a/hw/virtio/vhost.c

>> +++ b/hw/virtio/vhost.c

>> @@ -27,6 +27,7 @@

>>  #include "migration/blocker.h"

>>  #include "migration/qemu-file-types.h"

>>  #include "sysemu/dma.h"

>> +#include "sysemu/tcg.h"

>>  #include "trace.h"

>>  

>>  /* enabled until disconnected backend stabilizes */

>> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

>>      return r;

>>  }

>>  

>> +/*

>> + * vhost_section: identify sections needed for vhost access

>> + *

>> + * We only care about RAM sections here (where virtqueue can live). If

>> + * we find one we still allow the backend to potentially filter it out

>> + * of our list.

>> + */

>>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

>>  {

>> -    bool result;

>> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

>> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

>> -    result = memory_region_is_ram(section->mr) &&

>> -        !memory_region_is_rom(section->mr);

>> -

>> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

>> -     * than migration; this typically fires on VGA areas.

>> -     */

>> -    result &= !log_dirty;

>> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

>> +

>> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

>> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

>> +        uint8_t handled_dirty;

>>  

>> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

>> -        result &=

>> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

>> +        /*

>> +         * Vhost doesn't handle any block which is doing dirty-tracking other

>> +         * than migration; this typically fires on VGA areas. However

>> +         * for TCG we also do dirty code page tracking which shouldn't

>> +         * get in the way.

>> +         */

>> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

>> +        if (tcg_enabled()) {

>> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

>> +        }

>> +        if (dirty_mask & ~handled_dirty) {

>> +            result = DIRTY;

>> +        } else if (dev->vhost_ops->vhost_backend_mem_section_filter &&

>> +            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {

>> +            result = FILTERED;

>> +        } else {

>> +            result = OK;

>> +        }

>>      }

>>  

>>      trace_vhost_section(section->mr->name, result);

>> -    return result;

>> +    return result == OK;

>>  }

>>  

>>  static void vhost_begin(MemoryListener *listener)

>> -- 

>> 2.20.1

>> 



-- 
Alex Bennée
Dr. David Alan Gilbert June 4, 2020, 2:29 p.m. UTC | #10
* Alex Bennée (alex.bennee@linaro.org) wrote:
> 

> Dr. David Alan Gilbert <dgilbert@redhat.com> writes:

> 

> > * Alex Bennée (alex.bennee@linaro.org) wrote:

> >> The purpose of vhost_section is to identify RAM regions that need to

> >> be made available to a vhost client. However when running under TCG

> >> all RAM sections have DIRTY_MEMORY_CODE set which leads to problems

> >> down the line. The original comment implies VGA regions are a problem

> >> but doesn't explain why vhost has a problem with it.

> >> 

> >> Re-factor the code so:

> >> 

> >>   - steps are clearer to follow

> >>   - reason for rejection is recorded in the trace point

> >>   - we allow DIRTY_MEMORY_CODE when TCG is enabled

> >

> > The problem with VGA is that a VGA page can become mapped and unmapped

> > under the control of the guest; somewhere in a low address.  This tends

> > to break hugepage mappings.

> > For vhost-user, and in particular vhost-user-postcopy this means it

> > fails the mapping on the vhost-user client.

> >

> > However the other problem is that with vhost-user, the vhost-user client

> > is changing memory; and won't mark the pages as dirty - except for

> > migration (I'm not clear if vhost kernel does this).

> 

> For virtio this shouldn't be a problem because whatever the vhost-user

> client writes to should never be read by the guest until it gets kicked

> by the client to signal the virtqueue is done.

> 

> I guess migration is a fairly moot point given I haven't seen anything

> outside of a test declare VHOST_F_LOG_ALL support.

> 

> > So TCG won't notice a page that's been changed by the driver; now in

> > most cases it's rare for a device to be writing directly into a page

> > you're going to execute out of, but it's not unknown.

> 

> Not unknown outside of bugs?


I can think of sane reasons, I just don't know if they normally happen.
For example, it would make sense to me for a boot loader or simple OS to
tell a virtio-blk device to write directly into an area of RAM and then
for the OS to jump into it as soon as it gets notified that the device
completes.
I'm assuming incoming data under Linux gets copied around before use
so that incoming data never gets immediately executed.

> So stage 2 of this exercise is limiting the amount of exposed RAM to the

> client to just the virtqueues themselves (which is all vhost-user-rpmb

> should need).

> 

> > So, as it is, any area that's expecting to get non-migration dirty

> > notifications is going to be disappointed by a vhost-user backend.

> 

> It's not outside the realms of possibility that we could implement

> feedback to the softmmu/migration information from a vhost-user client

> but for now I think it's safe to assume we are eliding over the issue.


Dave

> 

> >

> > Dave

> >

> >> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

> >> Cc: Michael S. Tsirkin <mst@redhat.com>

> >> Cc: Dr. David Alan Gilbert <dgilbert@redhat.com>

> >> Cc: Stefan Hajnoczi <stefanha@redhat.com>

> >> ---

> >>  hw/virtio/vhost.c | 46 ++++++++++++++++++++++++++++++++--------------

> >>  1 file changed, 32 insertions(+), 14 deletions(-)

> >> 

> >> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c

> >> index aff98a0ede5..f81fc87e74c 100644

> >> --- a/hw/virtio/vhost.c

> >> +++ b/hw/virtio/vhost.c

> >> @@ -27,6 +27,7 @@

> >>  #include "migration/blocker.h"

> >>  #include "migration/qemu-file-types.h"

> >>  #include "sysemu/dma.h"

> >> +#include "sysemu/tcg.h"

> >>  #include "trace.h"

> >>  

> >>  /* enabled until disconnected backend stabilizes */

> >> @@ -403,26 +404,43 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev,

> >>      return r;

> >>  }

> >>  

> >> +/*

> >> + * vhost_section: identify sections needed for vhost access

> >> + *

> >> + * We only care about RAM sections here (where virtqueue can live). If

> >> + * we find one we still allow the backend to potentially filter it out

> >> + * of our list.

> >> + */

> >>  static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)

> >>  {

> >> -    bool result;

> >> -    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &

> >> -                     ~(1 << DIRTY_MEMORY_MIGRATION);

> >> -    result = memory_region_is_ram(section->mr) &&

> >> -        !memory_region_is_rom(section->mr);

> >> -

> >> -    /* Vhost doesn't handle any block which is doing dirty-tracking other

> >> -     * than migration; this typically fires on VGA areas.

> >> -     */

> >> -    result &= !log_dirty;

> >> +    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;

> >> +

> >> +    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {

> >> +        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);

> >> +        uint8_t handled_dirty;

> >>  

> >> -    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {

> >> -        result &=

> >> -            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);

> >> +        /*

> >> +         * Vhost doesn't handle any block which is doing dirty-tracking other

> >> +         * than migration; this typically fires on VGA areas. However

> >> +         * for TCG we also do dirty code page tracking which shouldn't

> >> +         * get in the way.

> >> +         */

> >> +        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);

> >> +        if (tcg_enabled()) {

> >> +            handled_dirty |= (1 << DIRTY_MEMORY_CODE);

> >> +        }

> >> +        if (dirty_mask & ~handled_dirty) {

> >> +            result = DIRTY;

> >> +        } else if (dev->vhost_ops->vhost_backend_mem_section_filter &&

> >> +            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {

> >> +            result = FILTERED;

> >> +        } else {

> >> +            result = OK;

> >> +        }

> >>      }

> >>  

> >>      trace_vhost_section(section->mr->name, result);

> >> -    return result;

> >> +    return result == OK;

> >>  }

> >>  

> >>  static void vhost_begin(MemoryListener *listener)

> >> -- 

> >> 2.20.1

> >> 

> 

> 

> -- 

> Alex Bennée

> 

--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
diff mbox series

Patch

diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index aff98a0ede5..f81fc87e74c 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -27,6 +27,7 @@ 
 #include "migration/blocker.h"
 #include "migration/qemu-file-types.h"
 #include "sysemu/dma.h"
+#include "sysemu/tcg.h"
 #include "trace.h"
 
 /* enabled until disconnected backend stabilizes */
@@ -403,26 +404,43 @@  static int vhost_verify_ring_mappings(struct vhost_dev *dev,
     return r;
 }
 
+/*
+ * vhost_section: identify sections needed for vhost access
+ *
+ * We only care about RAM sections here (where virtqueue can live). If
+ * we find one we still allow the backend to potentially filter it out
+ * of our list.
+ */
 static bool vhost_section(struct vhost_dev *dev, MemoryRegionSection *section)
 {
-    bool result;
-    bool log_dirty = memory_region_get_dirty_log_mask(section->mr) &
-                     ~(1 << DIRTY_MEMORY_MIGRATION);
-    result = memory_region_is_ram(section->mr) &&
-        !memory_region_is_rom(section->mr);
-
-    /* Vhost doesn't handle any block which is doing dirty-tracking other
-     * than migration; this typically fires on VGA areas.
-     */
-    result &= !log_dirty;
+    enum { OK = 0, NOT_RAM, DIRTY, FILTERED } result = NOT_RAM;
+
+    if (memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr)) {
+        uint8_t dirty_mask = memory_region_get_dirty_log_mask(section->mr);
+        uint8_t handled_dirty;
 
-    if (result && dev->vhost_ops->vhost_backend_mem_section_filter) {
-        result &=
-            dev->vhost_ops->vhost_backend_mem_section_filter(dev, section);
+        /*
+         * Vhost doesn't handle any block which is doing dirty-tracking other
+         * than migration; this typically fires on VGA areas. However
+         * for TCG we also do dirty code page tracking which shouldn't
+         * get in the way.
+         */
+        handled_dirty = (1 << DIRTY_MEMORY_MIGRATION);
+        if (tcg_enabled()) {
+            handled_dirty |= (1 << DIRTY_MEMORY_CODE);
+        }
+        if (dirty_mask & ~handled_dirty) {
+            result = DIRTY;
+        } else if (dev->vhost_ops->vhost_backend_mem_section_filter &&
+            !dev->vhost_ops->vhost_backend_mem_section_filter(dev, section)) {
+            result = FILTERED;
+        } else {
+            result = OK;
+        }
     }
 
     trace_vhost_section(section->mr->name, result);
-    return result;
+    return result == OK;
 }
 
 static void vhost_begin(MemoryListener *listener)