Message ID | 20250611133330.1514028-9-tabba@google.com |
---|---|
State | New |
Headers | show |
Series | KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand |
On 6/11/2025 7:03 PM, Fuad Tabba wrote: > This patch enables support for shared memory in guest_memfd, including > mapping that memory from host userspace. > > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED > flag at creation time. > > Reviewed-by: Gavin Shan <gshan@redhat.com> > Acked-by: David Hildenbrand <david@redhat.com> > Co-developed-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > Signed-off-by: Fuad Tabba <tabba@google.com> > --- > include/linux/kvm_host.h | 13 +++++++ > include/uapi/linux/kvm.h | 1 + > virt/kvm/Kconfig | 4 +++ > virt/kvm/guest_memfd.c | 73 ++++++++++++++++++++++++++++++++++++++++ > 4 files changed, 91 insertions(+) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 9a6712151a74..6b63556ca150 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm) > } > #endif > > +/* > + * Returns true if this VM supports shared mem in guest_memfd. > + * > + * Arch code must define kvm_arch_supports_gmem_shared_mem if support for > + * guest_memfd is enabled. > + */ > +#if !defined(kvm_arch_supports_gmem_shared_mem) > +static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm) > +{ > + return false; > +} > +#endif > + > #ifndef kvm_arch_has_readonly_mem > static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) > { > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > index d00b85cb168c..cb19150fd595 100644 > --- a/include/uapi/linux/kvm.h > +++ b/include/uapi/linux/kvm.h > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { > #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) > > #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) > > struct kvm_create_guest_memfd { > __u64 size; > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig > index 559c93ad90be..e90884f74404 100644 > --- a/virt/kvm/Kconfig > +++ b/virt/kvm/Kconfig > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE > config HAVE_KVM_ARCH_GMEM_INVALIDATE > bool > depends on KVM_GMEM > + > +config KVM_GMEM_SHARED_MEM > + select KVM_GMEM > + bool > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 6db515833f61..06616b6b493b 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) > return gfn - slot->base_gfn + slot->gmem.pgoff; > } > > +static bool kvm_gmem_supports_shared(struct inode *inode) > +{ > + const u64 flags = (u64)inode->i_private; > + > + if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) > + return false; > + > + return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; > +} > + > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) > +{ > + struct inode *inode = file_inode(vmf->vma->vm_file); > + struct folio *folio; > + vm_fault_t ret = VM_FAULT_LOCKED; > + > + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) > + return VM_FAULT_SIGBUS; > + > + folio = kvm_gmem_get_folio(inode, vmf->pgoff); > + if (IS_ERR(folio)) { > + int err = PTR_ERR(folio); > + > + if (err == -EAGAIN) > + return VM_FAULT_RETRY; > + > + return vmf_error(err); > + } > + > + if (WARN_ON_ONCE(folio_test_large(folio))) { > + ret = VM_FAULT_SIGBUS; > + goto out_folio; > + } > + > + if (!folio_test_uptodate(folio)) { > + clear_highpage(folio_page(folio, 0)); > + kvm_gmem_mark_prepared(folio); > + } > + > + vmf->page = folio_file_page(folio, vmf->pgoff); > + > +out_folio: > + if (ret != VM_FAULT_LOCKED) { > + folio_unlock(folio); > + folio_put(folio); > + } > + > + return ret; > +} > + > +static const struct vm_operations_struct kvm_gmem_vm_ops = { > + .fault = kvm_gmem_fault_shared, > +}; > + > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) > +{ > + if (!kvm_gmem_supports_shared(file_inode(file))) > + return -ENODEV; > + > + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != > + (VM_SHARED | VM_MAYSHARE)) { > + return -EINVAL; > + } > + > + vma->vm_ops = &kvm_gmem_vm_ops; > + > + return 0; > +} > + > static struct file_operations kvm_gmem_fops = { > + .mmap = kvm_gmem_mmap, > .open = generic_file_open, > .release = kvm_gmem_release, > .fallocate = kvm_gmem_fallocate, > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) > u64 flags = args->flags; > u64 valid_flags = 0; > > + if (kvm_arch_supports_gmem_shared_mem(kvm)) > + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; > + > if (flags & ~valid_flags) > return -EINVAL; > LGTM! Reviewed-by: Shivank Garg <shivankg@amd.com> Thanks, Shivank
Hi Sean, On Fri, 13 Jun 2025 at 22:03, Sean Christopherson <seanjc@google.com> wrote: > > On Wed, Jun 11, 2025, Fuad Tabba wrote: > > This patch enables support for shared memory in guest_memfd, including > > Please don't lead with with "This patch", simply state what changes are being > made as a command. Ack. > > mapping that memory from host userspace. > > > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, > > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED > > flag at creation time. > > Why? I can see that from the patch. It's in the patch series, not this patch. Would it help if I rephrase it along the lines of: This functionality isn't enabled until the introduction of the KVM_GMEM_SHARED_MEM Kconfig option, and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED flag at creation time. Both of which are introduced in a subsequent patch. > This changelog is way, way, waaay too light on details. Sorry for jumping in at > the 11th hour, but we've spent what, 2 years working on this? I'll expand this. Just to make sure that I include the right details, are you looking for implementation details, motivation, use cases? > > Reviewed-by: Gavin Shan <gshan@redhat.com> > > Acked-by: David Hildenbrand <david@redhat.com> > > Co-developed-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Fuad Tabba <tabba@google.com> > > --- > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index d00b85cb168c..cb19150fd595 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { > > #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) > > > > #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) > > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) > > I find the SUPPORT_SHARED terminology to be super confusing. I had to dig quite > deep to undesrtand that "support shared" actually mean "userspace explicitly > enable sharing on _this_ guest_memfd instance". E.g. I was surprised to see > > IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate. But even that is > weird to me. For non-CoCo VMs, there is no concept of shared vs. private. What's > novel and notable is that the memory is _mappable_. Yeah, yeah, pKVM's use case > is to share memory, but that's a _use case_, not the property of guest_memfd that > is being controlled by userspace. > > And kvm_gmem_memslot_supports_shared() is even worse. It's simply that the > memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd > instance is the _only_ entry point to the memslot. > > So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like > KVM_MEMSLOT_GUEST_MEMFD_ONLY. That will make code like this: > > if (kvm_slot_has_gmem(slot) && > (kvm_gmem_memslot_supports_shared(slot) || > kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { > return kvm_gmem_max_mapping_level(slot, gfn, max_level); > } > > much more intutive: > > if (kvm_is_memslot_gmem_only(slot) || > kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) > return kvm_gmem_max_mapping_level(slot, gfn, max_level); > > And then have kvm_gmem_mapping_order() do: > > WARN_ON_ONCE(!kvm_slot_has_gmem(slot)); > return 0; I have no preference really. To me this was intuitive, but I guess I have been staring at this way too long. If you and all the stakeholders are happy with your suggested changes, then I am happy making them :) > > struct kvm_create_guest_memfd { > > __u64 size; > > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig > > index 559c93ad90be..e90884f74404 100644 > > --- a/virt/kvm/Kconfig > > +++ b/virt/kvm/Kconfig > > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE > > config HAVE_KVM_ARCH_GMEM_INVALIDATE > > bool > > depends on KVM_GMEM > > + > > +config KVM_GMEM_SHARED_MEM > > + select KVM_GMEM > > + bool > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > > index 6db515833f61..06616b6b493b 100644 > > --- a/virt/kvm/guest_memfd.c > > +++ b/virt/kvm/guest_memfd.c > > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) > > return gfn - slot->base_gfn + slot->gmem.pgoff; > > } > > > > +static bool kvm_gmem_supports_shared(struct inode *inode) > > +{ > > + const u64 flags = (u64)inode->i_private; > > + > > + if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) > > + return false; > > + > > + return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; > > +} > > + > > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) > > And to my point about "shared", this is also very confusing, because there are > zero checks in here about shared vs. private. As you noted in a later email, it was you who suggested this name, but like I said, I am happy to change it. > > +{ > > + struct inode *inode = file_inode(vmf->vma->vm_file); > > + struct folio *folio; > > + vm_fault_t ret = VM_FAULT_LOCKED; > > + > > + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) > > + return VM_FAULT_SIGBUS; > > + > > + folio = kvm_gmem_get_folio(inode, vmf->pgoff); > > + if (IS_ERR(folio)) { > > + int err = PTR_ERR(folio); > > + > > + if (err == -EAGAIN) > > + return VM_FAULT_RETRY; > > + > > + return vmf_error(err); > > + } > > + > > + if (WARN_ON_ONCE(folio_test_large(folio))) { > > + ret = VM_FAULT_SIGBUS; > > + goto out_folio; > > + } > > + > > + if (!folio_test_uptodate(folio)) { > > + clear_highpage(folio_page(folio, 0)); > > + kvm_gmem_mark_prepared(folio); > > + } > > + > > + vmf->page = folio_file_page(folio, vmf->pgoff); > > + > > +out_folio: > > + if (ret != VM_FAULT_LOCKED) { > > + folio_unlock(folio); > > + folio_put(folio); > > + } > > + > > + return ret; > > +} > > + > > +static const struct vm_operations_struct kvm_gmem_vm_ops = { > > + .fault = kvm_gmem_fault_shared, > > +}; > > + > > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) > > +{ > > + if (!kvm_gmem_supports_shared(file_inode(file))) > > + return -ENODEV; > > + > > + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != > > + (VM_SHARED | VM_MAYSHARE)) { > > And the SHARED terminology gets really confusing here, due to colliding with the > existing notion of SHARED file mappings. Ack. Before I respin, let's make sure we're all on the same page in terms of terminology. Hopefully David can chime in again now that he's had the weekend to ponder over the latest exchange :) Thanks, /fuad > > + return -EINVAL; > > + } > > + > > + vma->vm_ops = &kvm_gmem_vm_ops; > > + > > + return 0; > > +} > > + > > static struct file_operations kvm_gmem_fops = { > > + .mmap = kvm_gmem_mmap, > > .open = generic_file_open, > > .release = kvm_gmem_release, > > .fallocate = kvm_gmem_fallocate, > > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) > > u64 flags = args->flags; > > u64 valid_flags = 0; > > > > + if (kvm_arch_supports_gmem_shared_mem(kvm)) > > + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; > > + > > if (flags & ~valid_flags) > > return -EINVAL; > > > > -- > > 2.50.0.rc0.642.g800a2b2222-goog > >
Sean Christopherson wrote: > On Wed, Jun 11, 2025, Fuad Tabba wrote: > > This patch enables support for shared memory in guest_memfd, including > > Please don't lead with with "This patch", simply state what changes are being > made as a command. > > > mapping that memory from host userspace. > > > This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, > > and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED > > flag at creation time. > > Why? I can see that from the patch. > > This changelog is way, way, waaay too light on details. Sorry for jumping in at > the 11th hour, but we've spent what, 2 years working on this? > > > Reviewed-by: Gavin Shan <gshan@redhat.com> > > Acked-by: David Hildenbrand <david@redhat.com> > > Co-developed-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Ackerley Tng <ackerleytng@google.com> > > Signed-off-by: Fuad Tabba <tabba@google.com> > > --- > > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > > index d00b85cb168c..cb19150fd595 100644 > > --- a/include/uapi/linux/kvm.h > > +++ b/include/uapi/linux/kvm.h > > @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { > > #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) > > > > #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) > > +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) > > I find the SUPPORT_SHARED terminology to be super confusing. I had to dig quite > deep to undesrtand that "support shared" actually mean "userspace explicitly > enable sharing on _this_ guest_memfd instance". E.g. I was surprised to see > > IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate. But even that is > weird to me. For non-CoCo VMs, there is no concept of shared vs. private. What's > novel and notable is that the memory is _mappable_. Yeah, yeah, pKVM's use case > is to share memory, but that's a _use case_, not the property of guest_memfd that > is being controlled by userspace. > > And kvm_gmem_memslot_supports_shared() is even worse. It's simply that the > memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd > instance is the _only_ entry point to the memslot. > > So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like If we are going to change this; FLAG_MAPPABLE is not clear to me either. The guest can map private memory, right? I see your point about shared being overloaded with file shared but it would not be the first time a term is overloaded. kvm_slot_has_gmem() does makes a lot of sense. If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE? Ira > KVM_MEMSLOT_GUEST_MEMFD_ONLY. That will make code like this: > > if (kvm_slot_has_gmem(slot) && > (kvm_gmem_memslot_supports_shared(slot) || > kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) { > return kvm_gmem_max_mapping_level(slot, gfn, max_level); > } > > much more intutive: > > if (kvm_is_memslot_gmem_only(slot) || > kvm_get_memory_attributes(kvm, gfn) & KVM_MEMORY_ATTRIBUTE_PRIVATE)) > return kvm_gmem_max_mapping_level(slot, gfn, max_level); > > And then have kvm_gmem_mapping_order() do: > > WARN_ON_ONCE(!kvm_slot_has_gmem(slot)); > return 0; > > > struct kvm_create_guest_memfd { > > __u64 size; > > diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig > > index 559c93ad90be..e90884f74404 100644 > > --- a/virt/kvm/Kconfig > > +++ b/virt/kvm/Kconfig > > @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE > > config HAVE_KVM_ARCH_GMEM_INVALIDATE > > bool > > depends on KVM_GMEM > > + > > +config KVM_GMEM_SHARED_MEM > > + select KVM_GMEM > > + bool > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > > index 6db515833f61..06616b6b493b 100644 > > --- a/virt/kvm/guest_memfd.c > > +++ b/virt/kvm/guest_memfd.c > > @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) > > return gfn - slot->base_gfn + slot->gmem.pgoff; > > } > > > > +static bool kvm_gmem_supports_shared(struct inode *inode) > > +{ > > + const u64 flags = (u64)inode->i_private; > > + > > + if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) > > + return false; > > + > > + return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; > > +} > > + > > +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) > > And to my point about "shared", this is also very confusing, because there are > zero checks in here about shared vs. private. > > > +{ > > + struct inode *inode = file_inode(vmf->vma->vm_file); > > + struct folio *folio; > > + vm_fault_t ret = VM_FAULT_LOCKED; > > + > > + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) > > + return VM_FAULT_SIGBUS; > > + > > + folio = kvm_gmem_get_folio(inode, vmf->pgoff); > > + if (IS_ERR(folio)) { > > + int err = PTR_ERR(folio); > > + > > + if (err == -EAGAIN) > > + return VM_FAULT_RETRY; > > + > > + return vmf_error(err); > > + } > > + > > + if (WARN_ON_ONCE(folio_test_large(folio))) { > > + ret = VM_FAULT_SIGBUS; > > + goto out_folio; > > + } > > + > > + if (!folio_test_uptodate(folio)) { > > + clear_highpage(folio_page(folio, 0)); > > + kvm_gmem_mark_prepared(folio); > > + } > > + > > + vmf->page = folio_file_page(folio, vmf->pgoff); > > + > > +out_folio: > > + if (ret != VM_FAULT_LOCKED) { > > + folio_unlock(folio); > > + folio_put(folio); > > + } > > + > > + return ret; > > +} > > + > > +static const struct vm_operations_struct kvm_gmem_vm_ops = { > > + .fault = kvm_gmem_fault_shared, > > +}; > > + > > +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) > > +{ > > + if (!kvm_gmem_supports_shared(file_inode(file))) > > + return -ENODEV; > > + > > + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != > > + (VM_SHARED | VM_MAYSHARE)) { > > And the SHARED terminology gets really confusing here, due to colliding with the > existing notion of SHARED file mappings. > > > + return -EINVAL; > > + } > > + > > + vma->vm_ops = &kvm_gmem_vm_ops; > > + > > + return 0; > > +} > > + > > static struct file_operations kvm_gmem_fops = { > > + .mmap = kvm_gmem_mmap, > > .open = generic_file_open, > > .release = kvm_gmem_release, > > .fallocate = kvm_gmem_fallocate, > > @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) > > u64 flags = args->flags; > > u64 valid_flags = 0; > > > > + if (kvm_arch_supports_gmem_shared_mem(kvm)) > > + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; > > + > > if (flags & ~valid_flags) > > return -EINVAL; > > > > -- > > 2.50.0.rc0.642.g800a2b2222-goog > >
On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote: > > On 16.06.25 15:44, Ira Weiny wrote: > > Sean Christopherson wrote: > >> On Wed, Jun 11, 2025, Fuad Tabba wrote: > >>> This patch enables support for shared memory in guest_memfd, including > >> > >> Please don't lead with with "This patch", simply state what changes are being > >> made as a command. > >> > >>> mapping that memory from host userspace. > >> > >>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, > >>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED > >>> flag at creation time. > >> > >> Why? I can see that from the patch. > >> > >> This changelog is way, way, waaay too light on details. Sorry for jumping in at > >> the 11th hour, but we've spent what, 2 years working on this? > >> > >>> Reviewed-by: Gavin Shan <gshan@redhat.com> > >>> Acked-by: David Hildenbrand <david@redhat.com> > >>> Co-developed-by: Ackerley Tng <ackerleytng@google.com> > >>> Signed-off-by: Ackerley Tng <ackerleytng@google.com> > >>> Signed-off-by: Fuad Tabba <tabba@google.com> > >>> --- > >>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h > >>> index d00b85cb168c..cb19150fd595 100644 > >>> --- a/include/uapi/linux/kvm.h > >>> +++ b/include/uapi/linux/kvm.h > >>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { > >>> #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) > >>> > >>> #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) > >>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) > >> > >> I find the SUPPORT_SHARED terminology to be super confusing. I had to dig quite > >> deep to undesrtand that "support shared" actually mean "userspace explicitly > >> enable sharing on _this_ guest_memfd instance". E.g. I was surprised to see > >> > >> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate. But even that is > >> weird to me. For non-CoCo VMs, there is no concept of shared vs. private. What's > >> novel and notable is that the memory is _mappable_. Yeah, yeah, pKVM's use case > >> is to share memory, but that's a _use case_, not the property of guest_memfd that > >> is being controlled by userspace. > >> > >> And kvm_gmem_memslot_supports_shared() is even worse. It's simply that the > >> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd > >> instance is the _only_ entry point to the memslot. > >> > >> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like > > > > If we are going to change this; FLAG_MAPPABLE is not clear to me either. > > The guest can map private memory, right? I see your point about shared > > being overloaded with file shared but it would not be the first time a > > term is overloaded. kvm_slot_has_gmem() does makes a lot of sense. > > > > If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE? > > If "shared" is not good enough terminology ... > > ... can we please just find a way to name what this "non-private" memory > is called? That something is mappable into $whatever is not the right > way to look at this IMHO. As raised in the past, we can easily support > read()/write()/etc to this non-private memory. > > > I'll note, the "non-private" memory in guest-memfd behaves just like ... > the "shared" memory in shmem ... well, or like other memory in memfd. > (which is based on mm/shmem.c). > > "Private" is also not the best way to describe the "protected\encrypted" > memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE. > > I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk > about "private" vs "shared" memory ... so that would have to be improved > as well. To add to what David just wrote, V1 of this series used the term "mappable" [1]. After a few discussions, I thought the consensus was that "shared" was a more accurate description --- i.e., mappability was a side effect of it being shared with the host. One could argue that non-CoCo VMs have no concept of "shared" vs "private". A different way of looking at it is, non-CoCo VMs have their state as shared by default. I don't have a strong opinion. What would be good if we could agree on the terminology before I respin this. Thanks, /fuad [1] https://lore.kernel.org/all/20250122152738.1173160-4-tabba@google.com/ > -- > Cheers, > > David / dhildenb >
On 16.06.25 16:16, Fuad Tabba wrote: > On Mon, 16 Jun 2025 at 15:03, David Hildenbrand <david@redhat.com> wrote: >> >> On 16.06.25 15:44, Ira Weiny wrote: >>> Sean Christopherson wrote: >>>> On Wed, Jun 11, 2025, Fuad Tabba wrote: >>>>> This patch enables support for shared memory in guest_memfd, including >>>> >>>> Please don't lead with with "This patch", simply state what changes are being >>>> made as a command. >>>> >>>>> mapping that memory from host userspace. >>>> >>>>> This functionality is gated by the KVM_GMEM_SHARED_MEM Kconfig option, >>>>> and enabled for a given instance by the GUEST_MEMFD_FLAG_SUPPORT_SHARED >>>>> flag at creation time. >>>> >>>> Why? I can see that from the patch. >>>> >>>> This changelog is way, way, waaay too light on details. Sorry for jumping in at >>>> the 11th hour, but we've spent what, 2 years working on this? >>>> >>>>> Reviewed-by: Gavin Shan <gshan@redhat.com> >>>>> Acked-by: David Hildenbrand <david@redhat.com> >>>>> Co-developed-by: Ackerley Tng <ackerleytng@google.com> >>>>> Signed-off-by: Ackerley Tng <ackerleytng@google.com> >>>>> Signed-off-by: Fuad Tabba <tabba@google.com> >>>>> --- >>>>> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h >>>>> index d00b85cb168c..cb19150fd595 100644 >>>>> --- a/include/uapi/linux/kvm.h >>>>> +++ b/include/uapi/linux/kvm.h >>>>> @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { >>>>> #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) >>>>> >>>>> #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) >>>>> +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) >>>> >>>> I find the SUPPORT_SHARED terminology to be super confusing. I had to dig quite >>>> deep to undesrtand that "support shared" actually mean "userspace explicitly >>>> enable sharing on _this_ guest_memfd instance". E.g. I was surprised to see >>>> >>>> IMO, GUEST_MEMFD_FLAG_SHAREABLE would be more appropriate. But even that is >>>> weird to me. For non-CoCo VMs, there is no concept of shared vs. private. What's >>>> novel and notable is that the memory is _mappable_. Yeah, yeah, pKVM's use case >>>> is to share memory, but that's a _use case_, not the property of guest_memfd that >>>> is being controlled by userspace. >>>> >>>> And kvm_gmem_memslot_supports_shared() is even worse. It's simply that the >>>> memslot is bound to a mappable guest_memfd instance, it's that the guest_memfd >>>> instance is the _only_ entry point to the memslot. >>>> >>>> So my vote would be "GUEST_MEMFD_FLAG_MAPPABLE", and then something like >>> >>> If we are going to change this; FLAG_MAPPABLE is not clear to me either. >>> The guest can map private memory, right? I see your point about shared >>> being overloaded with file shared but it would not be the first time a >>> term is overloaded. kvm_slot_has_gmem() does makes a lot of sense. >>> >>> If it is going to change; how about GUEST_MEMFD_FLAG_USER_MAPPABLE? >> >> If "shared" is not good enough terminology ... >> >> ... can we please just find a way to name what this "non-private" memory >> is called? That something is mappable into $whatever is not the right >> way to look at this IMHO. As raised in the past, we can easily support >> read()/write()/etc to this non-private memory. >> >> >> I'll note, the "non-private" memory in guest-memfd behaves just like ... >> the "shared" memory in shmem ... well, or like other memory in memfd. >> (which is based on mm/shmem.c). >> >> "Private" is also not the best way to describe the "protected\encrypted" >> memory, but that ship has sailed with KVM_MEMORY_ATTRIBUTE_PRIVATE. >> >> I'll further note that in the doc of KVM_SET_USER_MEMORY_REGION2 we talk >> about "private" vs "shared" memory ... so that would have to be improved >> as well. > > To add to what David just wrote, V1 of this series used the term > "mappable" [1]. After a few discussions, I thought the consensus was > that "shared" was a more accurate description --- i.e., mappability > was a side effect of it being shared with the host. > > One could argue that non-CoCo VMs have no concept of "shared" vs > "private". A different way of looking at it is, non-CoCo VMs have > their state as shared by default. All memory of these VMs behaves similar to other memory-based shared memory backends (memfd, shmem) in the system, yes. You can map it into multiple processes and use it like shmem/memfd. I'm still thinking about another way to call non-private memory ... no success so far. "ordinary" or "generic" is .... not better.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9a6712151a74..6b63556ca150 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -729,6 +729,19 @@ static inline bool kvm_arch_supports_gmem(struct kvm *kvm) } #endif +/* + * Returns true if this VM supports shared mem in guest_memfd. + * + * Arch code must define kvm_arch_supports_gmem_shared_mem if support for + * guest_memfd is enabled. + */ +#if !defined(kvm_arch_supports_gmem_shared_mem) +static inline bool kvm_arch_supports_gmem_shared_mem(struct kvm *kvm) +{ + return false; +} +#endif + #ifndef kvm_arch_has_readonly_mem static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d00b85cb168c..cb19150fd595 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1570,6 +1570,7 @@ struct kvm_memory_attributes { #define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3) #define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd) +#define GUEST_MEMFD_FLAG_SUPPORT_SHARED (1ULL << 0) struct kvm_create_guest_memfd { __u64 size; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 559c93ad90be..e90884f74404 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -128,3 +128,7 @@ config HAVE_KVM_ARCH_GMEM_PREPARE config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_GMEM + +config KVM_GMEM_SHARED_MEM + select KVM_GMEM + bool diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 6db515833f61..06616b6b493b 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -312,7 +312,77 @@ static pgoff_t kvm_gmem_get_index(struct kvm_memory_slot *slot, gfn_t gfn) return gfn - slot->base_gfn + slot->gmem.pgoff; } +static bool kvm_gmem_supports_shared(struct inode *inode) +{ + const u64 flags = (u64)inode->i_private; + + if (!IS_ENABLED(CONFIG_KVM_GMEM_SHARED_MEM)) + return false; + + return flags & GUEST_MEMFD_FLAG_SUPPORT_SHARED; +} + +static vm_fault_t kvm_gmem_fault_shared(struct vm_fault *vmf) +{ + struct inode *inode = file_inode(vmf->vma->vm_file); + struct folio *folio; + vm_fault_t ret = VM_FAULT_LOCKED; + + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) + return VM_FAULT_SIGBUS; + + folio = kvm_gmem_get_folio(inode, vmf->pgoff); + if (IS_ERR(folio)) { + int err = PTR_ERR(folio); + + if (err == -EAGAIN) + return VM_FAULT_RETRY; + + return vmf_error(err); + } + + if (WARN_ON_ONCE(folio_test_large(folio))) { + ret = VM_FAULT_SIGBUS; + goto out_folio; + } + + if (!folio_test_uptodate(folio)) { + clear_highpage(folio_page(folio, 0)); + kvm_gmem_mark_prepared(folio); + } + + vmf->page = folio_file_page(folio, vmf->pgoff); + +out_folio: + if (ret != VM_FAULT_LOCKED) { + folio_unlock(folio); + folio_put(folio); + } + + return ret; +} + +static const struct vm_operations_struct kvm_gmem_vm_ops = { + .fault = kvm_gmem_fault_shared, +}; + +static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma) +{ + if (!kvm_gmem_supports_shared(file_inode(file))) + return -ENODEV; + + if ((vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) != + (VM_SHARED | VM_MAYSHARE)) { + return -EINVAL; + } + + vma->vm_ops = &kvm_gmem_vm_ops; + + return 0; +} + static struct file_operations kvm_gmem_fops = { + .mmap = kvm_gmem_mmap, .open = generic_file_open, .release = kvm_gmem_release, .fallocate = kvm_gmem_fallocate, @@ -463,6 +533,9 @@ int kvm_gmem_create(struct kvm *kvm, struct kvm_create_guest_memfd *args) u64 flags = args->flags; u64 valid_flags = 0; + if (kvm_arch_supports_gmem_shared_mem(kvm)) + valid_flags |= GUEST_MEMFD_FLAG_SUPPORT_SHARED; + if (flags & ~valid_flags) return -EINVAL;