diff mbox series

[v12,08/18] KVM: guest_memfd: Allow host to map guest_memfd pages

Message ID 20250611133330.1514028-9-tabba@google.com
State New
Headers show
Series KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand

Commit Message

Fuad Tabba June 11, 2025, 1:33 p.m. UTC
This patch enables support for shared memory in guest_memfd, including
mapping that memory from host userspace.

This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
flag at creation time.

Reviewed-by: Gavin Shan <gshan@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Co-developed-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Ackerley Tng <ackerleytng@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
 include/linux/kvm_host.h | 13 +++++++
 include/uapi/linux/kvm.h |  1 +
 virt/kvm/Kconfig         |  4 +++
 virt/kvm/guest_memfd.c   | 73 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 91 insertions(+)

Comments

Shivank Garg June 12, 2025, 4:16 p.m. UTC | #1
On 6/11/2025 7:03 PM, Fuad Tabba wrote:
> This patch enables support for shared memory in guest_memfd, including
> mapping that memory from host userspace.
> 
> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> flag at creation time.
> 
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Acked-by: David Hildenbrand <david@redhat.com>
> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  include/linux/kvm_host.h | 13 +++++++
>  include/uapi/linux/kvm.h |  1 +
>  virt/kvm/Kconfig         |  4 +++
>  virt/kvm/guest_memfd.c   | 73 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 91 insertions(+)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 9a6712151a74..6b63556ca150 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm)
>  }
>  #endif
>  
> +/*
> + * Returns true if this VM supports shared mem in guest_memfd.
> + *
> + * Arch code must define kvm_arch_supports_gmem_shared_mem if support for
> + * guest_memfd is enabled.
> + */
> +#if !defined(kvm_arch_supports_gmem_shared_mem)
> +static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm)
> +{
> +	return false;
> +}
> +#endif
> +
>  #ifndef kvm_arch_has_readonly_mem
>  static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
>  {
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index d00b85cb168c..cb19150fd595 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
>  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>  
>  #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
>  
>  struct kvm_create_guest_memfd {
>  	__u64 size;
> diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> index 559c93ad90be..e90884f74404 100644
> --- a/virt/kvm/Kconfig
> +++ b/virt/kvm/Kconfig
> @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
>  config HAVE_KVM_ARCH_GMEM_INVALIDATE
>         bool
>         depends on KVM_GMEM
> +
> +config KVM_GMEM_SHARED_MEM
> +       select KVM_GMEM
> +       bool
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 6db515833f61..06616b6b493b 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
>  	return gfn - slot->base_gfn + slot->gmem.pgoff;
>  }
>  
> +static bool kvm_gmem_supports_shared(struct inode *inode)
> +{
> +	const u64 flags = (u64)inode->i_private;
> +
> +	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> +		return false;
> +
> +	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +}
> +
> +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
> +{
> +	struct inode *inode = file_inode(vmf->vma->vm_file);
> +	struct folio *folio;
> +	vm_fault_t ret = VM_FAULT_LOCKED;
> +
> +	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> +		return VM_FAULT_SIGBUS;
> +
> +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> +	if (IS_ERR(folio)) {
> +		int err = PTR_ERR(folio);
> +
> +		if (err == -EAGAIN)
> +			return VM_FAULT_RETRY;
> +
> +		return vmf_error(err);
> +	}
> +
> +	if (WARN_ON_ONCE(folio_test_large(folio))) {
> +		ret = VM_FAULT_SIGBUS;
> +		goto out_folio;
> +	}
> +
> +	if (!folio_test_uptodate(folio)) {
> +		clear_highpage(folio_page(folio, 0));
> +		kvm_gmem_mark_prepared(folio);
> +	}
> +
> +	vmf->page = folio_file_page(folio, vmf->pgoff);
> +
> +out_folio:
> +	if (ret != VM_FAULT_LOCKED) {
> +		folio_unlock(folio);
> +		folio_put(folio);
> +	}
> +
> +	return ret;
> +}
> +
> +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> +	.fault = kvm_gmem_fault_shared,
> +};
> +
> +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	if (!kvm_gmem_supports_shared(file_inode(file)))
> +		return -ENODEV;
> +
> +	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> +	    (VM_SHARED | VM_MAYSHARE)) {
> +		return -EINVAL;
> +	}
> +
> +	vma->vm_ops = &kvm_gmem_vm_ops;
> +
> +	return 0;
> +}
> +
>  static struct file_operations kvm_gmem_fops = {
> +	.mmap		= kvm_gmem_mmap,
>  	.open		= generic_file_open,
>  	.release	= kvm_gmem_release,
>  	.fallocate	= kvm_gmem_fallocate,
> @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
>  	u64 flags = args->flags;
>  	u64 valid_flags = 0;
>  
> +	if (kvm_arch_supports_gmem_shared_mem(kvm))
> +		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> +
>  	if (flags & ~valid_flags)
>  		return -EINVAL;
>  

LGTM!

Reviewed-by: Shivank Garg <shivankg@amd.com>

Thanks,
Shivank
Fuad Tabba June 16, 2025, 6:52 a.m. UTC | #2
Hi Sean,

On Fri, 13 Jun 2025 at 22:03, Sean Christopherson <seanjc@google.com> wrote:
>
> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> > This patch enables support for shared memory in guest_memfd, including
>
> Please don't lead with with "This patch", simply state what changes are being
> made as a command.

Ack.

> > mapping that memory from host userspace.
>
> > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> > flag at creation time.
>
> Why?  I can see that from the patch.

It's in the patch series, not this patch. Would it help if I rephrase
it along the lines of:

This functionality isn't enabled until the introduction of the
KVM_GMEM_SHARED_MEM Kconfig option, and enabled for a given instance
by the GUEST_MEMFD_FLAG_SUPPORT_SHARED flag at creation time. Both of
which are introduced in a subsequent patch.

> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> the 11th hour, but we've spent what, 2 years working on this?

I'll expand this. Just to make sure that I include the right details,
are you looking for implementation details, motivation, use cases?

> > Reviewed-by: Gavin Shan <gshan@redhat.com>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index d00b85cb168c..cb19150fd595 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >
> >  #define KVM_CREATE_GUEST_MEMFD       _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED      (1ULL << 0)
>
> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> deep to undesrtand that "support shared" actually mean "userspace explicitly
> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
>
> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> is to share memory, but that's a _use case_, not the property of guest_memfd that
> is being controlled by userspace.
>
> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> instance is the _only_ entry point to the memslot.
>
> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
> KVM_MEMSLOT_GUEST_MEMFD_ONLY.  That will make code like this:
>
>         if (kvm_slot_has_gmem(slot) &&
>             (kvm_gmem_memslot_supports_shared(slot) ||
>              kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
>                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
>         }
>
> much more intutive:
>
>         if (kvm_is_memslot_gmem_only(slot) ||
>             kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE))
>                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
>
> And then have kvm_gmem_mapping_order() do:
>
>         WARN_ON_ONCE(!kvm_slot_has_gmem(slot));
>         return 0;

I have no preference really. To me this was intuitive, but I guess I
have been staring at this way too long. If you and all the
stakeholders are happy with your suggested changes, then I am happy
making them :)


> >  struct kvm_create_guest_memfd {
> >       __u64 size;
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 559c93ad90be..e90884f74404 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >  config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >         bool
> >         depends on KVM_GMEM
> > +
> > +config KVM_GMEM_SHARED_MEM
> > +       select KVM_GMEM
> > +       bool
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index 6db515833f61..06616b6b493b 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >       return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >
> > +static bool kvm_gmem_supports_shared(struct inode *inode)
> > +{
> > +     const u64 flags = (u64)inode->i_private;
> > +
> > +     if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> > +             return false;
> > +
> > +     return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
>
> And to my point about "shared", this is also very confusing, because there are
> zero checks in here about shared vs. private.

As you noted in a later email, it was you who suggested this name, but
like I said, I am happy to change it.

> > +{
> > +     struct inode *inode = file_inode(vmf->vma->vm_file);
> > +     struct folio *folio;
> > +     vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +     if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> > +             return VM_FAULT_SIGBUS;
> > +
> > +     folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +     if (IS_ERR(folio)) {
> > +             int err = PTR_ERR(folio);
> > +
> > +             if (err == -EAGAIN)
> > +                     return VM_FAULT_RETRY;
> > +
> > +             return vmf_error(err);
> > +     }
> > +
> > +     if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +             ret = VM_FAULT_SIGBUS;
> > +             goto out_folio;
> > +     }
> > +
> > +     if (!folio_test_uptodate(folio)) {
> > +             clear_highpage(folio_page(folio, 0));
> > +             kvm_gmem_mark_prepared(folio);
> > +     }
> > +
> > +     vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > +     if (ret != VM_FAULT_LOCKED) {
> > +             folio_unlock(folio);
> > +             folio_put(folio);
> > +     }
> > +
> > +     return ret;
> > +}
> > +
> > +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> > +     .fault = kvm_gmem_fault_shared,
> > +};
> > +
> > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +     if (!kvm_gmem_supports_shared(file_inode(file)))
> > +             return -ENODEV;
> > +
> > +     if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> > +         (VM_SHARED | VM_MAYSHARE)) {
>
> And the SHARED terminology gets really confusing here, due to colliding with the
> existing notion of SHARED file mappings.

Ack.

Before I respin, let's make sure we're all on the same page in terms
of terminology. Hopefully David can chime in again now that he's had
the weekend to ponder over the latest exchange :)

Thanks,
/fuad

> > +             return -EINVAL;
> > +     }
> > +
> > +     vma->vm_ops = &kvm_gmem_vm_ops;
> > +
> > +     return 0;
> > +}
> > +
> >  static struct file_operations kvm_gmem_fops = {
> > +     .mmap           = kvm_gmem_mmap,
> >       .open           = generic_file_open,
> >       .release        = kvm_gmem_release,
> >       .fallocate      = kvm_gmem_fallocate,
> > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
> >       u64 flags = args->flags;
> >       u64 valid_flags = 0;
> >
> > +     if (kvm_arch_supports_gmem_shared_mem(kvm))
> > +             valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +
> >       if (flags & ~valid_flags)
> >               return -EINVAL;
> >
> > --
> > 2.50.0.rc0.642.g800a2b2222-goog
> >
Ira Weiny June 16, 2025, 1:44 p.m. UTC | #3
Sean Christopherson wrote:
> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> > This patch enables support for shared memory in guest_memfd, including
> 
> Please don't lead with with "This patch", simply state what changes are being
> made as a command.
> 
> > mapping that memory from host userspace.
> 
> > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> > flag at creation time.
> 
> Why?  I can see that from the patch.
> 
> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> the 11th hour, but we've spent what, 2 years working on this? 
> 
> > Reviewed-by: Gavin Shan <gshan@redhat.com>
> > Acked-by: David Hildenbrand <david@redhat.com>
> > Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index d00b85cb168c..cb19150fd595 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >  #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >  
> >  #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
> 
> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> deep to undesrtand that "support shared" actually mean "userspace explicitly
> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
> 
> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> is to share memory, but that's a _use case_, not the property of guest_memfd that
> is being controlled by userspace.
> 
> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> instance is the _only_ entry point to the memslot.
> 
> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like

If we are going to change this; FLAG_MAPPABLE is not clear to me either.
The guest can map private memory, right?  I see your point about shared
being overloaded with file shared but it would not be the first time a
term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.

If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?

Ira

> KVM_MEMSLOT_GUEST_MEMFD_ONLY.  That will make code like this:
> 
> 	if (kvm_slot_has_gmem(slot) &&
> 	    (kvm_gmem_memslot_supports_shared(slot) ||
> 	     kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
> 		return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> 	}
> 
> much more intutive:
> 
> 	if (kvm_is_memslot_gmem_only(slot) ||
> 	    kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE))
> 		return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> 
> And then have kvm_gmem_mapping_order() do:
> 
> 	WARN_ON_ONCE(!kvm_slot_has_gmem(slot));
> 	return 0;
> 
> >  struct kvm_create_guest_memfd {
> >  	__u64 size;
> > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
> > index 559c93ad90be..e90884f74404 100644
> > --- a/virt/kvm/Kconfig
> > +++ b/virt/kvm/Kconfig
> > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE
> >  config HAVE_KVM_ARCH_GMEM_INVALIDATE
> >         bool
> >         depends on KVM_GMEM
> > +
> > +config KVM_GMEM_SHARED_MEM
> > +       select KVM_GMEM
> > +       bool
> > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> > index 6db515833f61..06616b6b493b 100644
> > --- a/virt/kvm/guest_memfd.c
> > +++ b/virt/kvm/guest_memfd.c
> > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
> >  	return gfn - slot->base_gfn + slot->gmem.pgoff;
> >  }
> >  
> > +static bool kvm_gmem_supports_shared(struct inode *inode)
> > +{
> > +	const u64 flags = (u64)inode->i_private;
> > +
> > +	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
> > +		return false;
> > +
> > +	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +}
> > +
> > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
> 
> And to my point about "shared", this is also very confusing, because there are
> zero checks in here about shared vs. private.
> 
> > +{
> > +	struct inode *inode = file_inode(vmf->vma->vm_file);
> > +	struct folio *folio;
> > +	vm_fault_t ret = VM_FAULT_LOCKED;
> > +
> > +	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
> > +		return VM_FAULT_SIGBUS;
> > +
> > +	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
> > +	if (IS_ERR(folio)) {
> > +		int err = PTR_ERR(folio);
> > +
> > +		if (err == -EAGAIN)
> > +			return VM_FAULT_RETRY;
> > +
> > +		return vmf_error(err);
> > +	}
> > +
> > +	if (WARN_ON_ONCE(folio_test_large(folio))) {
> > +		ret = VM_FAULT_SIGBUS;
> > +		goto out_folio;
> > +	}
> > +
> > +	if (!folio_test_uptodate(folio)) {
> > +		clear_highpage(folio_page(folio, 0));
> > +		kvm_gmem_mark_prepared(folio);
> > +	}
> > +
> > +	vmf->page = folio_file_page(folio, vmf->pgoff);
> > +
> > +out_folio:
> > +	if (ret != VM_FAULT_LOCKED) {
> > +		folio_unlock(folio);
> > +		folio_put(folio);
> > +	}
> > +
> > +	return ret;
> > +}
> > +
> > +static const struct vm_operations_struct kvm_gmem_vm_ops = {
> > +	.fault = kvm_gmem_fault_shared,
> > +};
> > +
> > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> > +{
> > +	if (!kvm_gmem_supports_shared(file_inode(file)))
> > +		return -ENODEV;
> > +
> > +	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
> > +	    (VM_SHARED | VM_MAYSHARE)) {
> 
> And the SHARED terminology gets really confusing here, due to colliding with the
> existing notion of SHARED file mappings.
> 
> > +		return -EINVAL;
> > +	}
> > +
> > +	vma->vm_ops = &kvm_gmem_vm_ops;
> > +
> > +	return 0;
> > +}
> > +
> >  static struct file_operations kvm_gmem_fops = {
> > +	.mmap		= kvm_gmem_mmap,
> >  	.open		= generic_file_open,
> >  	.release	= kvm_gmem_release,
> >  	.fallocate	= kvm_gmem_fallocate,
> > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
> >  	u64 flags = args->flags;
> >  	u64 valid_flags = 0;
> >  
> > +	if (kvm_arch_supports_gmem_shared_mem(kvm))
> > +		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
> > +
> >  	if (flags & ~valid_flags)
> >  		return -EINVAL;
> >  
> > -- 
> > 2.50.0.rc0.642.g800a2b2222-goog
> >
Fuad Tabba June 16, 2025, 2:16 p.m. UTC | #4
On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote:
>
> On 16.06.25 15:44, Ira Weiny wrote:
> > Sean Christopherson wrote:
> >> On Wed, Jun 11, 2025, Fuad Tabba wrote:
> >>> This patch enables support for shared memory in guest_memfd, including
> >>
> >> Please don't lead with with "This patch", simply state what changes are being
> >> made as a command.
> >>
> >>> mapping that memory from host userspace.
> >>
> >>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> >>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> >>> flag at creation time.
> >>
> >> Why?  I can see that from the patch.
> >>
> >> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> >> the 11th hour, but we've spent what, 2 years working on this?
> >>
> >>> Reviewed-by: Gavin Shan <gshan@redhat.com>
> >>> Acked-by: David Hildenbrand <david@redhat.com>
> >>> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
> >>> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
> >>> Signed-off-by: Fuad Tabba <tabba@google.com>
> >>> ---
> >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >>> index d00b85cb168c..cb19150fd595 100644
> >>> --- a/include/uapi/linux/kvm.h
> >>> +++ b/include/uapi/linux/kvm.h
> >>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
> >>>   #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
> >>>
> >>>   #define KVM_CREATE_GUEST_MEMFD    _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
> >>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED    (1ULL << 0)
> >>
> >> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
> >> deep to undesrtand that "support shared" actually mean "userspace explicitly
> >> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
> >>
> >> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
> >> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
> >> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
> >> is to share memory, but that's a _use case_, not the property of guest_memfd that
> >> is being controlled by userspace.
> >>
> >> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
> >> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
> >> instance is the _only_ entry point to the memslot.
> >>
> >> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
> >
> > If we are going to change this; FLAG_MAPPABLE is not clear to me either.
> > The guest can map private memory, right?  I see your point about shared
> > being overloaded with file shared but it would not be the first time a
> > term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.
> >
> > If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?
>
> If "shared" is not good enough terminology ...
>
> ... can we please just find a way to name what this "non-private" memory
> is called? That something is mappable into $whatever is not the right
> way to look at this IMHO. As raised in the past, we can easily support
> read()/write()/etc to this non-private memory.
>
>
> I'll note, the "non-private" memory in guest-memfd behaves just like ...
> the "shared" memory in shmem ... well, or like other memory in memfd.
> (which is based on mm/shmem.c).
>
> "Private" is also not the best way to describe the "protected\encrypted"
> memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE.
>
> I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk
> about "private" vs "shared" memory ... so that would have to be improved
> as well.

To add to what David just wrote, V1 of this series used the term
"mappable" [1].  After a few discussions, I thought the consensus was
that "shared" was a more accurate description --- i.e., mappability
was a side effect of it being shared with the host.

One could argue that non-CoCo VMs have no concept of "shared" vs
"private". A different way of looking at it is, non-CoCo VMs have
their state as shared by default.

I don't have a strong opinion. What would be good if we could agree on
the terminology before I respin this.

Thanks,
/fuad


[1] https://lore.kernel.org/all/20250122152738.1173160-4-tabba@google.com/


> --
> Cheers,
>
> David / dhildenb
>
David Hildenbrand June 16, 2025, 2:25 p.m. UTC | #5
On 16.06.25 16:16, Fuad Tabba wrote:
> On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote:
>>
>> On 16.06.25 15:44, Ira Weiny wrote:
>>> Sean Christopherson wrote:
>>>> On Wed, Jun 11, 2025, Fuad Tabba wrote:
>>>>> This patch enables support for shared memory in guest_memfd, including
>>>>
>>>> Please don't lead with with "This patch", simply state what changes are being
>>>> made as a command.
>>>>
>>>>> mapping that memory from host userspace.
>>>>
>>>>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
>>>>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
>>>>> flag at creation time.
>>>>
>>>> Why?  I can see that from the patch.
>>>>
>>>> This changelog is way, way, waaay too light on details.  Sorry for jumping in at
>>>> the 11th hour, but we've spent what, 2 years working on this?
>>>>
>>>>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>>>>> Acked-by: David Hildenbrand <david@redhat.com>
>>>>> Co-developed-by: Ackerley Tng <ackerleytng@google.com>
>>>>> Signed-off-by: Ackerley Tng <ackerleytng@google.com>
>>>>> Signed-off-by: Fuad Tabba <tabba@google.com>
>>>>> ---
>>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
>>>>> index d00b85cb168c..cb19150fd595 100644
>>>>> --- a/include/uapi/linux/kvm.h
>>>>> +++ b/include/uapi/linux/kvm.h
>>>>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes {
>>>>>    #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
>>>>>
>>>>>    #define KVM_CREATE_GUEST_MEMFD    _IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
>>>>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED    (1ULL << 0)
>>>>
>>>> I find the SUPPORT_SHARED terminology to be super confusing.  I had to dig quite
>>>> deep to undesrtand that "support shared" actually mean "userspace explicitly
>>>> enable sharing on _this_ guest_memfd instance".  E.g. I was surprised to see
>>>>
>>>> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate.  But even that is
>>>> weird to me.  For non-CoCo VMs, there is no concept of shared vs. private.  What's
>>>> novel and notable is that the memory is _mappable_.  Yeah, yeah, pKVM's use case
>>>> is to share memory, but that's a _use case_, not the property of guest_memfd that
>>>> is being controlled by userspace.
>>>>
>>>> And kvm_gmem_memslot_supports_shared() is even worse.  It's simply that the
>>>> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd
>>>> instance is the _only_ entry point to the memslot.
>>>>
>>>> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
>>>
>>> If we are going to change this; FLAG_MAPPABLE is not clear to me either.
>>> The guest can map private memory, right?  I see your point about shared
>>> being overloaded with file shared but it would not be the first time a
>>> term is overloaded.  kvm_slot_has_gmem() does makes a lot of sense.
>>>
>>> If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE?
>>
>> If "shared" is not good enough terminology ...
>>
>> ... can we please just find a way to name what this "non-private" memory
>> is called? That something is mappable into $whatever is not the right
>> way to look at this IMHO. As raised in the past, we can easily support
>> read()/write()/etc to this non-private memory.
>>
>>
>> I'll note, the "non-private" memory in guest-memfd behaves just like ...
>> the "shared" memory in shmem ... well, or like other memory in memfd.
>> (which is based on mm/shmem.c).
>>
>> "Private" is also not the best way to describe the "protected\encrypted"
>> memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE.
>>
>> I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk
>> about "private" vs "shared" memory ... so that would have to be improved
>> as well.
> 
> To add to what David just wrote, V1 of this series used the term
> "mappable" [1].  After a few discussions, I thought the consensus was
> that "shared" was a more accurate description --- i.e., mappability
> was a side effect of it being shared with the host.
> 
> One could argue that non-CoCo VMs have no concept of "shared" vs
> "private". A different way of looking at it is, non-CoCo VMs have
> their state as shared by default.

All memory of these VMs behaves similar to other memory-based shared 
memory backends (memfd, shmem) in the system, yes. You can map it into 
multiple processes and use it like shmem/memfd.

I'm still thinking about another way to call non-private memory ... no 
success so far. "ordinary" or "generic" is .... not better.
Sean Christopherson June 17, 2025, 11:04 p.m. UTC | #6
On Mon, Jun 16, 2025, Fuad Tabba wrote:
> > > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
> > > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
> > > flag at creation time.
> >
> > Why?  I can see that from the patch.
> 
> It's in the patch series, not this patch.

Eh, not really.  It doesn't even matter how "Why?" is interpreted, because nothing
in this series covers any of the reasonable interpretations to an acceptable
degree.

These are all the changelogs for generic changes

 : This patch enables support for shared memory in guest_memfd, including
 : mapping that memory from host userspace.
 : 
 : This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option,
 : and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED
 : flag at creation time.

 : Add a new internal flag in the top half of memslot->flags to track when
 : a guest_memfd-backed slot supports shared memory, which is reserved for
 : internal use in KVM.
 : 
 : This avoids repeatedly checking the underlying guest_memfd file for
 : shared memory support, which requires taking a reference on the file.

the small bit of documentation

 +When the capability KVM_CAP_GMEM_SHARED_MEM is supported, the 'flags' field
 +supports GUEST_MEMFD_FLAG_SUPPORT_SHARED.  Setting this flag on guest_memfd
 +creation enables mmap() and faulting of guest_memfd memory to host userspace.
 +
 +When the KVM MMU performs a PFN lookup to service a guest fault and the backing
 +guest_memfd has the GUEST_MEMFD_FLAG_SUPPORT_SHARED set, then the fault will
 +always be consumed from guest_memfd, regardless of whether it is a shared or a
 +private fault.

and the cover letter

 : The purpose of this series is to allow mapping guest_memfd backed memory
 : at the host. This support enables VMMs like Firecracker to run guests
 : backed completely by guest_memfd [2]. Combined with Patrick's series for
 : direct map removal in guest_memfd [3], this would allow running VMs that
 : offer additional hardening against Spectre-like transient execution
 : attacks.
 : 
 : This series will also serve as a base for _restricted_ mmap() support
 : for guest_memfd backed memory at the host for CoCos that allow sharing
 : guest memory in-place with the host [4].

None of those get remotely close to explaining the use cases in sufficient
detail.

Now, it's entirely acceptable, and in this case probably highly preferred, to
link to the relevant use cases, e.g. as opposed to trying to regurgitate and
distill a huge pile of information.

But I want the _changelog_ to do the heavy lifting of capturing the most useful
links and providing context.  E.g. to find the the motiviation for using
guest_memfd to back non-CoCo VMs, I had to follow the [3] link to Patrick's
series, then walk backwards through the versions of _that_ series, and eventually
come across another link in Patrick's very first RFC:

 : This RFC series is a rough draft adding support for running
 : non-confidential compute VMs in guest_memfd, based on prior discussions
 : with Sean [1].

where [1] is the much more helpful:

  https://lore.kernel.org/linux-mm/cc1bb8e9bc3e1ab637700a4d3defeec95b55060a.camel@amazon.com

Now, _I_ am obviously aware of most/all of the use cases and motiviations, but
the changelog isn't just for people like me.  Far from it; the changelog is most
useful for people that are coming in with _zero_ knowledge and context.  Finding
the above link took me quite a bit of effort and digging (and to some extent, I
knew what I was looking for), whereas an explicit reference in the changelog
would (hopefully) take only the few seconds needed to read the blurb and click
the link.

My main argument for why you (and everyone else) should put significant effort
into changelogs (and comments and documentation!) is very simple: writing and
curating a good changelog (comment/documentation) is something the author does
*once*.  If the author skimps out on the changelog, then *every* reader is having
to do that same work *every* time they dig through this code.  We as a community
come out far, far ahead in terms of developer time and understanding by turning a
many-time cost into a one-time cost (and that's not even accounting for the fact
that the author's one-time cost will like be a _lot_ smaller).

There's obviously a balance to strike.  E.g. if the changelog has 50 links, that's
probably going to be counter-productive for most readers.  In this case, 5-7-ish
links with (very) brief contextual references is probably the sweet spot.

> Would it help if I rephrase it along the lines of:
> 
> This functionality isn't enabled until the introduction of the
> KVM_GMEM_SHARED_MEM Kconfig option, and enabled for a given instance
> by the GUEST_MEMFD_FLAG_SUPPORT_SHARED flag at creation time. Both of
> which are introduced in a subsequent patch.
> 
> > This changelog is way, way, waaay too light on details.  Sorry for jumping in at
> > the 11th hour, but we've spent what, 2 years working on this?
> 
> I'll expand this. Just to make sure that I include the right details,
> are you looking for implementation details, motivation, use cases?

Despite my lengthy response, none of the above?

Use cases are good fodder for Documentation and the cover letter, and for *brief*
references in the changelogs.  Implementation details generally don't need to be
explained in the changelog, modulo notable gotchas and edge cases that are worth
calling out.

I _am_ looking for the motivation, but I suspect it's not the motivation you have
in mind.  I'm not terribly concerned with why you want to implement this
functionality; that should be easy to glean from the Documentation and use case
links.

The motivation I'm looking for is why you're adding CONFIG_KVM_GMEM_SHARED_MEM
and GUEST_MEMFD_FLAG_SUPPORT_SHARED.

E.g. CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES was added because it gates large swaths
of code, uAPI, and a field we don't want to access "accidentally" (mem_attr_array),
and because CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES has a hard dependency on
CONFIG_KVM_GENERIC_MMU_NOTIFIER.

For CONFIG_KVM_GMEM_SHARED_MEM, I'm just not seeing the motiviation.   It gates
very little code (though that could be slightly changed by wrapping the mmap()
and fault logic guest_memfd.c), and literally every use is part of a broader
conditional.  I.e. it's effectively an optimization.

Ha!  And it's actively buggy.  Because this will allow shared gmem for DEFAULT_VM,

	#define kvm_arch_supports_gmem_shared_mem(kvm)			\
	(IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM) &&			\
	 ((kvm)->arch.vm_type == KVM_X86_SW_PROTECTED_VM ||		\
	  (kvm)->arch.vm_type == KVM_X86_DEFAULT_VM))

but only if CONFIG_KVM_SW_PROTECTED_VM is selected.  That makes no sense.  And
that changelog is also sorely lacking.  It covers the what, but that's quite
useless, because I can very easily see the what from the code.  By covering the
"why" in the changelog, (hopefully) you would have come to the same conclusion
that selecting KVM_GMEM_SHARED_MEM iff KVM_SW_PROTECTED_VM is enabled doesn't
make any sense (because you wouldn't have been able to write a sane justification).

Or, if it somehow does make sense, i.e. if I'm missing something, then that
absolutely needs to in the changelog!

 : Define the architecture-specific macro to enable shared memory support
 : in guest_memfd for ordinary, i.e., non-CoCo, VM types, specifically
 : KVM_X86_DEFAULT_VM and KVM_X86_SW_PROTECTED_VM.
 : 
 : Enable the KVM_GMEM_SHARED_MEM Kconfig option if KVM_SW_PROTECTED_VM is
 : enabled.


As for GUEST_MEMFD_FLAG_SUPPORT_SHARED, after digging through the code, I _think_
the reason we need a flag is so that KVM knows to completely ignore the HVA in
the memslot.  (a) explaining that (again, for future readers) would be super
helpful, and (b) if there is other motiviation for a per-guest_memfd opt-in, then
_that_ is also very interesting.

And for (a), bonus points if you explain why it's a GUEST_MEMFD flag, e.g. as
opposed to a per-VM capability or per-memslot flag.  (Though this may be self-
evident to any readers that understand any of this, so definitely optional).

> > So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like
> > KVM_MEMSLOT_GUEST_MEMFD_ONLY.  That will make code like this:
> >
> >         if (kvm_slot_has_gmem(slot) &&
> >             (kvm_gmem_memslot_supports_shared(slot) ||
> >              kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) {
> >                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> >         }
> >
> > much more intutive:
> >
> >         if (kvm_is_memslot_gmem_only(slot) ||
> >             kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE))
> >                 return kvm_gmem_max_mapping_level(slot, gfn, max_level);
> >
> > And then have kvm_gmem_mapping_order() do:
> >
> >         WARN_ON_ONCE(!kvm_slot_has_gmem(slot));
> >         return 0;
> 
> I have no preference really. To me this was intuitive, but I guess I
> have been staring at this way too long.

I agree that SHARED is intuitive for the pKVM use case (and probably all CoCo use
cases).  My objection with the name is that it's misleading/confusing for non-CoCo
VMs (at least for me), and that using SHARED could unnecessarily paint us into a
corner.

Specifically, if there are ever use cases where guest memory is shared between
entities *without* mapping guest memory into host userspace, then we'll be a bit
hosed.  Though as is tradition in KVM, I suppose we could just call it
GUEST_MEMFD_FLAG_SUPPORT_SHARED2 ;-)

Regarding CoCo vs. non-CoCo intuition, it's easy enough to discern that
GUEST_MEMFD_FLAG_MAPPABLE is required to do in-place sharing with host userspace.

But IMO it's not easy to glean that GUEST_MEMFD_FLAG_SUPPORT_SHARED is a
effectively a hard requirement for non-CoCo x86 VMs purely because because many
flows in KVM x86 will fail miserable if KVM can't access guest memory via uaccess,
i.e. if guest memory isn't mapped by host userspace.  In other words, it's as much
about working within KVM's existing design (and not losing support for a wide
swath of features) as it is about "sharing" guest memory with host userspace.
diff mbox series

Patch

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9a6712151a74..6b63556ca150 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -729,6 +729,19 @@  static inline bool kvm_arch_supports_gmem(struct kvm *kvm)
 }
 #endif
 
+/*
+ * Returns true if this VM supports shared mem in guest_memfd.
+ *
+ * Arch code must define kvm_arch_supports_gmem_shared_mem if support for
+ * guest_memfd is enabled.
+ */
+#if !defined(kvm_arch_supports_gmem_shared_mem)
+static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 #ifndef kvm_arch_has_readonly_mem
 static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index d00b85cb168c..cb19150fd595 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1570,6 +1570,7 @@  struct kvm_memory_attributes {
 #define KVM_MEMORY_ATTRIBUTE_PRIVATE           (1ULL << 3)
 
 #define KVM_CREATE_GUEST_MEMFD	_IOWR(KVMIO,  0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_SUPPORT_SHARED	(1ULL << 0)
 
 struct kvm_create_guest_memfd {
 	__u64 size;
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 559c93ad90be..e90884f74404 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -128,3 +128,7 @@  config HAVE_KVM_ARCH_GMEM_PREPARE
 config HAVE_KVM_ARCH_GMEM_INVALIDATE
        bool
        depends on KVM_GMEM
+
+config KVM_GMEM_SHARED_MEM
+       select KVM_GMEM
+       bool
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index 6db515833f61..06616b6b493b 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -312,7 +312,77 @@  static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn)
 	return gfn - slot->base_gfn + slot->gmem.pgoff;
 }
 
+static bool kvm_gmem_supports_shared(struct inode *inode)
+{
+	const u64 flags = (u64)inode->i_private;
+
+	if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM))
+		return false;
+
+	return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED;
+}
+
+static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf)
+{
+	struct inode *inode = file_inode(vmf->vma->vm_file);
+	struct folio *folio;
+	vm_fault_t ret = VM_FAULT_LOCKED;
+
+	if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+		return VM_FAULT_SIGBUS;
+
+	folio = kvm_gmem_get_folio(inode, vmf->pgoff);
+	if (IS_ERR(folio)) {
+		int err = PTR_ERR(folio);
+
+		if (err == -EAGAIN)
+			return VM_FAULT_RETRY;
+
+		return vmf_error(err);
+	}
+
+	if (WARN_ON_ONCE(folio_test_large(folio))) {
+		ret = VM_FAULT_SIGBUS;
+		goto out_folio;
+	}
+
+	if (!folio_test_uptodate(folio)) {
+		clear_highpage(folio_page(folio, 0));
+		kvm_gmem_mark_prepared(folio);
+	}
+
+	vmf->page = folio_file_page(folio, vmf->pgoff);
+
+out_folio:
+	if (ret != VM_FAULT_LOCKED) {
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+
+	return ret;
+}
+
+static const struct vm_operations_struct kvm_gmem_vm_ops = {
+	.fault = kvm_gmem_fault_shared,
+};
+
+static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (!kvm_gmem_supports_shared(file_inode(file)))
+		return -ENODEV;
+
+	if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) !=
+	    (VM_SHARED | VM_MAYSHARE)) {
+		return -EINVAL;
+	}
+
+	vma->vm_ops = &kvm_gmem_vm_ops;
+
+	return 0;
+}
+
 static struct file_operations kvm_gmem_fops = {
+	.mmap		= kvm_gmem_mmap,
 	.open		= generic_file_open,
 	.release	= kvm_gmem_release,
 	.fallocate	= kvm_gmem_fallocate,
@@ -463,6 +533,9 @@  int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args)
 	u64 flags = args->flags;
 	u64 valid_flags = 0;
 
+	if (kvm_arch_supports_gmem_shared_mem(kvm))
+		valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED;
+
 	if (flags & ~valid_flags)
 		return -EINVAL;