Message ID | 20250611133330.1514028-15-tabba@google.com |
---|---|
State | New |
Headers | show |
Series | KVM: Mapping guest_memfd backed memory at the host for software protected VMs | expand |
On Wed, Jun 11, 2025 at 6:34 AM Fuad Tabba <tabba@google.com> wrote: > > Add arm64 support for handling guest page faults on guest_memfd backed > memslots. Until guest_memfd supports huge pages, the fault granule is > restricted to PAGE_SIZE. > > Reviewed-by: Gavin Shan <gshan@redhat.com> > Signed-off-by: Fuad Tabba <tabba@google.com> Thanks Fuad! Hopefully Oliver and/or Marc can take a look at these Arm patches soon. :) Feel free to add: Reviewed-by: James Houghton <jthoughton@google.com> > --- > arch/arm64/kvm/mmu.c | 82 ++++++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 79 insertions(+), 3 deletions(-) > > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 58662e0ef13e..71f8b53683e7 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -1512,6 +1512,78 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, > *prot |= kvm_encode_nested_level(nested); > } > > +#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) > + > +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > + struct kvm_s2_trans *nested, > + struct kvm_memory_slot *memslot, bool is_perm) > +{ > + bool write_fault, exec_fault, writable; > + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; > + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; > + struct page *page; > + struct kvm *kvm = vcpu->kvm; > + void *memcache; > + kvm_pfn_t pfn; > + gfn_t gfn; > + int ret; > + > + ret = prepare_mmu_memcache(vcpu, true, &memcache); > + if (ret) > + return ret; > + > + if (nested) > + gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT; > + else > + gfn = fault_ipa >> PAGE_SHIFT; > + > + write_fault = kvm_is_write_fault(vcpu); > + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); > + > + if (write_fault && exec_fault) { > + kvm_err("Simultaneous write and execution fault\n"); > + return -EFAULT; > + } > + > + if (is_perm && !write_fault && !exec_fault) { > + kvm_err("Unexpected L2 read permission error\n"); > + return -EFAULT; > + } > + > + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); > + if (ret) { > + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, > + write_fault, exec_fault, false); > + return ret; > + } > + > + writable = !(memslot->flags & KVM_MEM_READONLY); > + > + if (nested) > + adjust_nested_fault_perms(nested, &prot, &writable); > + > + if (writable) > + prot |= KVM_PGTABLE_PROT_W; > + > + if (exec_fault || > + (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && > + (!nested || kvm_s2_trans_executable(nested)))) > + prot |= KVM_PGTABLE_PROT_X; > + > + kvm_fault_lock(kvm); > + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, > + __pfn_to_phys(pfn), prot, > + memcache, flags); > + kvm_release_faultin_page(kvm, page, !!ret, writable); > + kvm_fault_unlock(kvm); > + > + if (writable && !ret) > + mark_page_dirty_in_slot(kvm, memslot, gfn); > + > + return ret != -EAGAIN ? ret : 0; > +} > + > static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > struct kvm_s2_trans *nested, > struct kvm_memory_slot *memslot, unsigned long hva, > @@ -1536,7 +1608,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, > enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; > struct kvm_pgtable *pgt; > struct page *page; > - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; > + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; > > if (fault_is_perm) > fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); > @@ -1963,8 +2035,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) > goto out_unlock; > } > > - ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, > - esr_fsc_is_permission_fault(esr)); > + if (kvm_slot_has_gmem(memslot)) > + ret = gmem_abort(vcpu, fault_ipa, nested, memslot, > + esr_fsc_is_permission_fault(esr)); > + else > + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, > + esr_fsc_is_permission_fault(esr)); > if (ret == 0) > ret = 1; > out: > -- > 2.50.0.rc0.642.g800a2b2222-goog >
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 58662e0ef13e..71f8b53683e7 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1512,6 +1512,78 @@ static void adjust_nested_fault_perms(struct kvm_s2_trans *nested, *prot |= kvm_encode_nested_level(nested); } +#define KVM_PGTABLE_WALK_MEMABORT_FLAGS (KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED) + +static int gmem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_s2_trans *nested, + struct kvm_memory_slot *memslot, bool is_perm) +{ + bool write_fault, exec_fault, writable; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; + enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; + struct kvm_pgtable *pgt = vcpu->arch.hw_mmu->pgt; + struct page *page; + struct kvm *kvm = vcpu->kvm; + void *memcache; + kvm_pfn_t pfn; + gfn_t gfn; + int ret; + + ret = prepare_mmu_memcache(vcpu, true, &memcache); + if (ret) + return ret; + + if (nested) + gfn = kvm_s2_trans_output(nested) >> PAGE_SHIFT; + else + gfn = fault_ipa >> PAGE_SHIFT; + + write_fault = kvm_is_write_fault(vcpu); + exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu); + + if (write_fault && exec_fault) { + kvm_err("Simultaneous write and execution fault\n"); + return -EFAULT; + } + + if (is_perm && !write_fault && !exec_fault) { + kvm_err("Unexpected L2 read permission error\n"); + return -EFAULT; + } + + ret = kvm_gmem_get_pfn(kvm, memslot, gfn, &pfn, &page, NULL); + if (ret) { + kvm_prepare_memory_fault_exit(vcpu, fault_ipa, PAGE_SIZE, + write_fault, exec_fault, false); + return ret; + } + + writable = !(memslot->flags & KVM_MEM_READONLY); + + if (nested) + adjust_nested_fault_perms(nested, &prot, &writable); + + if (writable) + prot |= KVM_PGTABLE_PROT_W; + + if (exec_fault || + (cpus_have_final_cap(ARM64_HAS_CACHE_DIC) && + (!nested || kvm_s2_trans_executable(nested)))) + prot |= KVM_PGTABLE_PROT_X; + + kvm_fault_lock(kvm); + ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, PAGE_SIZE, + __pfn_to_phys(pfn), prot, + memcache, flags); + kvm_release_faultin_page(kvm, page, !!ret, writable); + kvm_fault_unlock(kvm); + + if (writable && !ret) + mark_page_dirty_in_slot(kvm, memslot, gfn); + + return ret != -EAGAIN ? ret : 0; +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_s2_trans *nested, struct kvm_memory_slot *memslot, unsigned long hva, @@ -1536,7 +1608,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; struct kvm_pgtable *pgt; struct page *page; - enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_HANDLE_FAULT | KVM_PGTABLE_WALK_SHARED; + enum kvm_pgtable_walk_flags flags = KVM_PGTABLE_WALK_MEMABORT_FLAGS; if (fault_is_perm) fault_granule = kvm_vcpu_trap_get_perm_fault_granule(vcpu); @@ -1963,8 +2035,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) goto out_unlock; } - ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, - esr_fsc_is_permission_fault(esr)); + if (kvm_slot_has_gmem(memslot)) + ret = gmem_abort(vcpu, fault_ipa, nested, memslot, + esr_fsc_is_permission_fault(esr)); + else + ret = user_mem_abort(vcpu, fault_ipa, nested, memslot, hva, + esr_fsc_is_permission_fault(esr)); if (ret == 0) ret = 1; out: