Message ID | 20230125083851.27759-1-surenb@google.com |
---|---|
Headers | show |
Series | introduce vm_flags modifier functions | expand |
On Wed 25-01-23 00:38:47, Suren Baghdasaryan wrote: > To simplify the usage of VM_LOCKED_CLEAR_MASK in clear_vm_flags(), > replace it with VM_LOCKED_MASK bitmask and convert all users. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Michal Hocko <mhocko@suse.com> > --- > include/linux/mm.h | 4 ++-- > kernel/fork.c | 2 +- > mm/hugetlb.c | 4 ++-- > mm/mlock.c | 6 +++--- > mm/mmap.c | 6 +++--- > mm/mremap.c | 2 +- > 6 files changed, 12 insertions(+), 12 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index b71f2809caac..da62bdd627bf 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -421,8 +421,8 @@ extern unsigned int kobjsize(const void *objp); > /* This mask defines which mm->def_flags a process can inherit its parent */ > #define VM_INIT_DEF_MASK VM_NOHUGEPAGE > > -/* This mask is used to clear all the VMA flags used by mlock */ > -#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) > +/* This mask represents all the VMA flag bits used by mlock */ > +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) > > /* Arch-specific flags to clear when updating VM flags on protection change */ > #ifndef VM_ARCH_CLEAR > diff --git a/kernel/fork.c b/kernel/fork.c > index 6683c1b0f460..03d472051236 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -669,7 +669,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, > tmp->anon_vma = NULL; > } else if (anon_vma_fork(tmp, mpnt)) > goto fail_nomem_anon_vma_fork; > - tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); > + clear_vm_flags(tmp, VM_LOCKED_MASK); > file = tmp->vm_file; > if (file) { > struct address_space *mapping = file->f_mapping; > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index d20c8b09890e..4ecdbad9a451 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6973,8 +6973,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, > unsigned long s_end = sbase + PUD_SIZE; > > /* Allow segments to share if only one is marked locked */ > - unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > - unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK; > + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK; > + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK; > > /* > * match the virtual addresses, permission and the alignment of the > diff --git a/mm/mlock.c b/mm/mlock.c > index 0336f52e03d7..5c4fff93cd6b 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -497,7 +497,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len, > if (vma->vm_start != tmp) > return -ENOMEM; > > - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > + newflags = vma->vm_flags & ~VM_LOCKED_MASK; > newflags |= flags; > /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ > tmp = vma->vm_end; > @@ -661,7 +661,7 @@ static int apply_mlockall_flags(int flags) > struct vm_area_struct *vma, *prev = NULL; > vm_flags_t to_add = 0; > > - current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; > + current->mm->def_flags &= ~VM_LOCKED_MASK; > if (flags & MCL_FUTURE) { > current->mm->def_flags |= VM_LOCKED; > > @@ -681,7 +681,7 @@ static int apply_mlockall_flags(int flags) > for_each_vma(vmi, vma) { > vm_flags_t newflags; > > - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > + newflags = vma->vm_flags & ~VM_LOCKED_MASK; > newflags |= to_add; > > /* Ignore errors */ > diff --git a/mm/mmap.c b/mm/mmap.c > index d4abc6feced1..323bd253b25a 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2671,7 +2671,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || > is_vm_hugetlb_page(vma) || > vma == get_gate_vma(current->mm)) > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + clear_vm_flags(vma, VM_LOCKED_MASK); > else > mm->locked_vm += (len >> PAGE_SHIFT); > } > @@ -3340,8 +3340,8 @@ static struct vm_area_struct *__install_special_mapping( > vma->vm_start = addr; > vma->vm_end = addr + len; > > - vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + init_vm_flags(vma, (vm_flags | mm->def_flags | > + VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); > vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > > vma->vm_ops = ops; > diff --git a/mm/mremap.c b/mm/mremap.c > index 1b3ee02bead7..35db9752cb6a 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -687,7 +687,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, > > if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { > /* We always clear VM_LOCKED[ONFAULT] on the old vma */ > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + clear_vm_flags(vma, VM_LOCKED_MASK); > > /* > * anon_vma links of the old vma is no longer needed after its page > -- > 2.39.1
On Wed, Jan 25, 2023 at 1:10 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Wed, Jan 25, 2023 at 12:38:46AM -0800, Suren Baghdasaryan wrote: > > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > > index 2d6d790d9bed..6c7c70bf50dd 100644 > > --- a/include/linux/mm_types.h > > +++ b/include/linux/mm_types.h > > @@ -491,7 +491,13 @@ struct vm_area_struct { > > * See vmf_insert_mixed_prot() for discussion. > > */ > > pgprot_t vm_page_prot; > > - unsigned long vm_flags; /* Flags, see mm.h. */ > > + > > + /* > > + * Flags, see mm.h. > > + * WARNING! Do not modify directly. > > + * Use {init|reset|set|clear|mod}_vm_flags() functions instead. > > + */ > > + unsigned long vm_flags; > > We have __private and ACCESS_PRIVATE() to help with enforcing this. Thanks for pointing this out, Peter! I guess for that I'll need to convert all read accesses and provide get_vm_flags() too? That will cause some additional churt (a quick search shows 801 hits over 248 files) but maybe it's worth it? I think Michal suggested that too in another patch. Should I do that while we are at it? >
On Wed, Jan 25, 2023 at 1:38 AM 'Michal Hocko' via kernel-team <kernel-team@android.com> wrote: > > On Wed 25-01-23 00:38:49, Suren Baghdasaryan wrote: > > Replace indirect modifications to vma->vm_flags with calls to modifier > > functions to be able to track flag changes and to keep vma locking > > correctness. Add a BUG_ON check in ksm_madvise() to catch indirect > > vm_flags modification attempts. > > Those BUG_ONs scream to much IMHO. KSM is an MM internal code so I > gueess we should be willing to trust it. Yes, but I really want to prevent an indirect misuse since it was not easy to find these. If you feel strongly about it I will remove them or if you have a better suggestion I'm all for it. > > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > > Acked-by: Michal Hocko <mhocko@suse.com> > -- > Michal Hocko > SUSE Labs > > -- > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@android.com. >
On Wed 25-01-23 08:57:48, Suren Baghdasaryan wrote: > On Wed, Jan 25, 2023 at 1:38 AM 'Michal Hocko' via kernel-team > <kernel-team@android.com> wrote: > > > > On Wed 25-01-23 00:38:49, Suren Baghdasaryan wrote: > > > Replace indirect modifications to vma->vm_flags with calls to modifier > > > functions to be able to track flag changes and to keep vma locking > > > correctness. Add a BUG_ON check in ksm_madvise() to catch indirect > > > vm_flags modification attempts. > > > > Those BUG_ONs scream to much IMHO. KSM is an MM internal code so I > > gueess we should be willing to trust it. > > Yes, but I really want to prevent an indirect misuse since it was not > easy to find these. If you feel strongly about it I will remove them > or if you have a better suggestion I'm all for it. You can avoid that by making flags inaccesible directly, right?
On Wed, Jan 25, 2023 at 12:38:46AM -0800, Suren Baghdasaryan wrote: > +/* Use when VMA is not part of the VMA tree and needs no locking */ > +static inline void init_vm_flags(struct vm_area_struct *vma, > + unsigned long flags) > +{ > + vma->vm_flags = flags; vm_flags are supposed to have type vm_flags_t. That's not been fully realised yet, but perhaps we could avoid making it worse? > pgprot_t vm_page_prot; > - unsigned long vm_flags; /* Flags, see mm.h. */ > + > + /* > + * Flags, see mm.h. > + * WARNING! Do not modify directly. > + * Use {init|reset|set|clear|mod}_vm_flags() functions instead. > + */ > + unsigned long vm_flags; Including changing this line to vm_flags_t
On Wed, Jan 25, 2023 at 10:37 AM Matthew Wilcox <willy@infradead.org> wrote: > > On Wed, Jan 25, 2023 at 08:49:50AM -0800, Suren Baghdasaryan wrote: > > On Wed, Jan 25, 2023 at 1:10 AM Peter Zijlstra <peterz@infradead.org> wrote: > > > > + /* > > > > + * Flags, see mm.h. > > > > + * WARNING! Do not modify directly. > > > > + * Use {init|reset|set|clear|mod}_vm_flags() functions instead. > > > > + */ > > > > + unsigned long vm_flags; > > > > > > We have __private and ACCESS_PRIVATE() to help with enforcing this. > > > > Thanks for pointing this out, Peter! I guess for that I'll need to > > convert all read accesses and provide get_vm_flags() too? That will > > cause some additional churt (a quick search shows 801 hits over 248 > > files) but maybe it's worth it? I think Michal suggested that too in > > another patch. Should I do that while we are at it? > > Here's a trick I saw somewhere in the VFS: > > union { > const vm_flags_t vm_flags; > vm_flags_t __private __vm_flags; > }; > > Now it can be read by anybody but written only by those using > ACCESS_PRIVATE. Huh, this is quite nice! I think it does not save us from the cases when vma->vm_flags is passed by a reference and modified indirectly, like in ksm_madvise()? Though maybe such usecases are so rare (I found only 2 cases) that we can ignore this?
On Wed, Jan 25, 2023 at 12:38:47AM -0800, Suren Baghdasaryan wrote: > To simplify the usage of VM_LOCKED_CLEAR_MASK in clear_vm_flags(), > replace it with VM_LOCKED_MASK bitmask and convert all users. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> > --- > include/linux/mm.h | 4 ++-- > kernel/fork.c | 2 +- > mm/hugetlb.c | 4 ++-- > mm/mlock.c | 6 +++--- > mm/mmap.c | 6 +++--- > mm/mremap.c | 2 +- > 6 files changed, 12 insertions(+), 12 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index b71f2809caac..da62bdd627bf 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -421,8 +421,8 @@ extern unsigned int kobjsize(const void *objp); > /* This mask defines which mm->def_flags a process can inherit its parent */ > #define VM_INIT_DEF_MASK VM_NOHUGEPAGE > > -/* This mask is used to clear all the VMA flags used by mlock */ > -#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) > +/* This mask represents all the VMA flag bits used by mlock */ > +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) > > /* Arch-specific flags to clear when updating VM flags on protection change */ > #ifndef VM_ARCH_CLEAR > diff --git a/kernel/fork.c b/kernel/fork.c > index 6683c1b0f460..03d472051236 100644 > --- a/kernel/fork.c > +++ b/kernel/fork.c > @@ -669,7 +669,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm, > tmp->anon_vma = NULL; > } else if (anon_vma_fork(tmp, mpnt)) > goto fail_nomem_anon_vma_fork; > - tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT); > + clear_vm_flags(tmp, VM_LOCKED_MASK); > file = tmp->vm_file; > if (file) { > struct address_space *mapping = file->f_mapping; > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index d20c8b09890e..4ecdbad9a451 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -6973,8 +6973,8 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma, > unsigned long s_end = sbase + PUD_SIZE; > > /* Allow segments to share if only one is marked locked */ > - unsigned long vm_flags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > - unsigned long svm_flags = svma->vm_flags & VM_LOCKED_CLEAR_MASK; > + unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED_MASK; > + unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED_MASK; > > /* > * match the virtual addresses, permission and the alignment of the > diff --git a/mm/mlock.c b/mm/mlock.c > index 0336f52e03d7..5c4fff93cd6b 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -497,7 +497,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len, > if (vma->vm_start != tmp) > return -ENOMEM; > > - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > + newflags = vma->vm_flags & ~VM_LOCKED_MASK; > newflags |= flags; > /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ > tmp = vma->vm_end; > @@ -661,7 +661,7 @@ static int apply_mlockall_flags(int flags) > struct vm_area_struct *vma, *prev = NULL; > vm_flags_t to_add = 0; > > - current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; > + current->mm->def_flags &= ~VM_LOCKED_MASK; > if (flags & MCL_FUTURE) { > current->mm->def_flags |= VM_LOCKED; > > @@ -681,7 +681,7 @@ static int apply_mlockall_flags(int flags) > for_each_vma(vmi, vma) { > vm_flags_t newflags; > > - newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > + newflags = vma->vm_flags & ~VM_LOCKED_MASK; > newflags |= to_add; > > /* Ignore errors */ > diff --git a/mm/mmap.c b/mm/mmap.c > index d4abc6feced1..323bd253b25a 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -2671,7 +2671,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, > if ((vm_flags & VM_SPECIAL) || vma_is_dax(vma) || > is_vm_hugetlb_page(vma) || > vma == get_gate_vma(current->mm)) > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + clear_vm_flags(vma, VM_LOCKED_MASK); > else > mm->locked_vm += (len >> PAGE_SHIFT); > } > @@ -3340,8 +3340,8 @@ static struct vm_area_struct *__install_special_mapping( > vma->vm_start = addr; > vma->vm_end = addr + len; > > - vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY; > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + init_vm_flags(vma, (vm_flags | mm->def_flags | > + VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); > vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > > vma->vm_ops = ops; > diff --git a/mm/mremap.c b/mm/mremap.c > index 1b3ee02bead7..35db9752cb6a 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -687,7 +687,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, > > if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) { > /* We always clear VM_LOCKED[ONFAULT] on the old vma */ > - vma->vm_flags &= VM_LOCKED_CLEAR_MASK; > + clear_vm_flags(vma, VM_LOCKED_MASK); > > /* > * anon_vma links of the old vma is no longer needed after its page > -- > 2.39.1 > >
On Wed, Jan 25, 2023 at 12:38:49AM -0800, Suren Baghdasaryan wrote: > Replace indirect modifications to vma->vm_flags with calls to modifier > functions to be able to track flag changes and to keep vma locking > correctness. Add a BUG_ON check in ksm_madvise() to catch indirect > vm_flags modification attempts. > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> > --- > arch/powerpc/kvm/book3s_hv_uvmem.c | 5 ++++- > arch/s390/mm/gmap.c | 5 ++++- > mm/khugepaged.c | 2 ++ > mm/ksm.c | 2 ++ > 4 files changed, 12 insertions(+), 2 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c > index 1d67baa5557a..325a7a47d348 100644 > --- a/arch/powerpc/kvm/book3s_hv_uvmem.c > +++ b/arch/powerpc/kvm/book3s_hv_uvmem.c > @@ -393,6 +393,7 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm, > { > unsigned long gfn = memslot->base_gfn; > unsigned long end, start = gfn_to_hva(kvm, gfn); > + unsigned long vm_flags; > int ret = 0; > struct vm_area_struct *vma; > int merge_flag = (merge) ? MADV_MERGEABLE : MADV_UNMERGEABLE; > @@ -409,12 +410,14 @@ static int kvmppc_memslot_page_merge(struct kvm *kvm, > ret = H_STATE; > break; > } > + vm_flags = vma->vm_flags; > ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, > - merge_flag, &vma->vm_flags); > + merge_flag, &vm_flags); > if (ret) { > ret = H_STATE; > break; > } > + reset_vm_flags(vma, vm_flags); > start = vma->vm_end; > } while (end > vma->vm_end); > > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index 3a695b8a1e3c..d5eb47dcdacb 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -2587,14 +2587,17 @@ int gmap_mark_unmergeable(void) > { > struct mm_struct *mm = current->mm; > struct vm_area_struct *vma; > + unsigned long vm_flags; > int ret; > VMA_ITERATOR(vmi, mm, 0); > > for_each_vma(vmi, vma) { > + vm_flags = vma->vm_flags; > ret = ksm_madvise(vma, vma->vm_start, vma->vm_end, > - MADV_UNMERGEABLE, &vma->vm_flags); > + MADV_UNMERGEABLE, &vm_flags); > if (ret) > return ret; > + reset_vm_flags(vma, vm_flags); > } > mm->def_flags &= ~VM_MERGEABLE; > return 0; > diff --git a/mm/khugepaged.c b/mm/khugepaged.c > index 8abc59345bf2..76b24cd0c179 100644 > --- a/mm/khugepaged.c > +++ b/mm/khugepaged.c > @@ -354,6 +354,8 @@ struct attribute_group khugepaged_attr_group = { > int hugepage_madvise(struct vm_area_struct *vma, > unsigned long *vm_flags, int advice) > { > + /* vma->vm_flags can be changed only using modifier functions */ > + BUG_ON(vm_flags == &vma->vm_flags); > switch (advice) { > case MADV_HUGEPAGE: > #ifdef CONFIG_S390 > diff --git a/mm/ksm.c b/mm/ksm.c > index 04f1c8c2df11..992b2be9f5e6 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -2573,6 +2573,8 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start, > struct mm_struct *mm = vma->vm_mm; > int err; > > + /* vma->vm_flags can be changed only using modifier functions */ > + BUG_ON(vm_flags == &vma->vm_flags); > switch (advice) { > case MADV_MERGEABLE: > /* > -- > 2.39.1 > >
On Thu, Jan 26, 2023 at 11:17:09AM +0200, Mike Rapoport wrote: > On Wed, Jan 25, 2023 at 12:38:46AM -0800, Suren Baghdasaryan wrote: > > vm_flags are among VMA attributes which affect decisions like VMA merging > > and splitting. Therefore all vm_flags modifications are performed after > > taking exclusive mmap_lock to prevent vm_flags updates racing with such > > operations. Introduce modifier functions for vm_flags to be used whenever > > flags are updated. This way we can better check and control correct > > locking behavior during these updates. > > > > Signed-off-by: Suren Baghdasaryan <surenb@google.com> > > --- > > include/linux/mm.h | 37 +++++++++++++++++++++++++++++++++++++ > > include/linux/mm_types.h | 8 +++++++- > > 2 files changed, 44 insertions(+), 1 deletion(-) > > > > diff --git a/include/linux/mm.h b/include/linux/mm.h > > index c2f62bdce134..b71f2809caac 100644 > > --- a/include/linux/mm.h > > +++ b/include/linux/mm.h > > @@ -627,6 +627,43 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) > > INIT_LIST_HEAD(&vma->anon_vma_chain); > > } > > > > +/* Use when VMA is not part of the VMA tree and needs no locking */ > > +static inline void init_vm_flags(struct vm_area_struct *vma, > > + unsigned long flags) > > I'd suggest to make it vm_flags_init() etc. Thinking more about it, it will be even clearer to name these vma_flags_xyz() > Except that > > Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> > -- Sincerely yours, Mike.