@@ -709,5 +709,17 @@ static inline bool mem_cgroup_swap_full(struct folio *folio)
}
#endif
+static inline bool trylock_swapoff(swp_entry_t entry,
+ struct swap_info_struct **si)
+{
+ return get_swap_device(entry);
+}
+
+static inline void unlock_swapoff(swp_entry_t entry,
+ struct swap_info_struct *si)
+{
+ put_swap_device(si);
+}
+
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */
@@ -4305,6 +4305,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
struct swap_info_struct *si = NULL;
rmap_t rmap_flags = RMAP_NONE;
bool need_clear_cache = false;
+ bool swapoff_locked = false;
bool exclusive = false;
swp_entry_t entry;
pte_t pte;
@@ -4365,8 +4366,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
}
/* Prevent swapoff from happening to us. */
- si = get_swap_device(entry);
- if (unlikely(!si))
+ swapoff_locked = trylock_swapoff(entry, &si);
+ if (unlikely(!swapoff_locked))
goto out;
folio = swap_cache_get_folio(entry, vma, vmf->address);
@@ -4713,8 +4714,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (waitqueue_active(&swapcache_wq))
wake_up(&swapcache_wq);
}
- if (si)
- put_swap_device(si);
+ if (swapoff_locked)
+ unlock_swapoff(entry, si);
return ret;
out_nomap:
if (vmf->pte)
@@ -4732,8 +4733,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
if (waitqueue_active(&swapcache_wq))
wake_up(&swapcache_wq);
}
- if (si)
- put_swap_device(si);
+ if (swapoff_locked)
+ unlock_swapoff(entry, si);
return ret;
}
@@ -2262,8 +2262,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (is_poisoned_swp_entry(swap))
return -EIO;
- si = get_swap_device(swap);
- if (!si) {
+ if (!trylock_swapoff(swap, &si)) {
if (!shmem_confirm_swap(mapping, index, swap))
return -EEXIST;
else
@@ -2411,7 +2410,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
}
folio_mark_dirty(folio);
swap_free_nr(swap, nr_pages);
- put_swap_device(si);
+ unlock_swapoff(swap, si);
*foliop = folio;
return 0;
@@ -2428,7 +2427,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
folio_unlock(folio);
folio_put(folio);
}
- put_swap_device(si);
+ unlock_swapoff(swap, si);
return error;
}
@@ -419,12 +419,11 @@ struct folio *filemap_get_incore_folio(struct address_space *mapping,
if (non_swap_entry(swp))
return ERR_PTR(-ENOENT);
/* Prevent swapoff from happening to us */
- si = get_swap_device(swp);
- if (!si)
+ if (!trylock_swapoff(swp, &si))
return ERR_PTR(-ENOENT);
index = swap_cache_index(swp);
folio = filemap_get_folio(swap_address_space(swp), index);
- put_swap_device(si);
+ unlock_swapoff(swp, si);
return folio;
}
@@ -439,8 +438,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
void *shadow = NULL;
*new_page_allocated = false;
- si = get_swap_device(entry);
- if (!si)
+ if (!trylock_swapoff(entry, &si))
return NULL;
for (;;) {
@@ -538,7 +536,7 @@ struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
put_swap_folio(new_folio, entry);
folio_unlock(new_folio);
put_and_return:
- put_swap_device(si);
+ unlock_swapoff(entry, si);
if (!(*new_page_allocated) && new_folio)
folio_put(new_folio);
return result;
@@ -1161,6 +1161,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
struct folio *src_folio = NULL;
struct anon_vma *src_anon_vma = NULL;
struct mmu_notifier_range range;
+ bool swapoff_locked = false;
int err = 0;
flush_cache_range(src_vma, src_addr, src_addr + PAGE_SIZE);
@@ -1367,8 +1368,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
goto out;
}
- si = get_swap_device(entry);
- if (unlikely(!si)) {
+ swapoff_locked = trylock_swapoff(entry, &si);
+ if (unlikely(!swapoff_locked)) {
err = -EAGAIN;
goto out;
}
@@ -1399,7 +1400,7 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
pte_unmap(src_pte);
pte_unmap(dst_pte);
src_pte = dst_pte = NULL;
- put_swap_device(si);
+ unlock_swapoff(entry, si);
si = NULL;
/* now we can block and wait */
folio_lock(src_folio);
@@ -1425,8 +1426,8 @@ static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
if (src_pte)
pte_unmap(src_pte);
mmu_notifier_invalidate_range_end(&range);
- if (si)
- put_swap_device(si);
+ if (swapoff_locked)
+ unlock_swapoff(entry, si);
return err;
}
Currently, we get a reference to the backing swap device in order to lock out swapoff and ensure its validity. This does not make sense in the new virtual swap design, especially after the swap backends are decoupled - a swap entry might not have any backing swap device at all, and its backend might change at any time during its lifetime. In preparation for this, abstract away the swapoff locking out behavior into a generic API. Signed-off-by: Nhat Pham <nphamcs@gmail.com> --- include/linux/swap.h | 12 ++++++++++++ mm/memory.c | 13 +++++++------ mm/shmem.c | 7 +++---- mm/swap_state.c | 10 ++++------ mm/userfaultfd.c | 11 ++++++----- 5 files changed, 32 insertions(+), 21 deletions(-)