diff mbox series

[RFC,v2,04/18] mm: swap: add an abstract API for locking out swapoff

Message ID 20250429233848.3093350-5-nphamcs@gmail.com
State New
Headers show
Series Virtual Swap Space | expand

Commit Message

Nhat Pham April 29, 2025, 11:38 p.m. UTC
Currently, we get a reference to the backing swap device in order to
lock out swapoff and ensure its validity. This does not make sense in
the new virtual swap design, especially after the swap backends are
decoupled - a swap entry might not have any backing swap device at all,
and its backend might change at any time during its lifetime.

In preparation for this, abstract away the swapoff locking out behavior
into a generic API.

Signed-off-by: Nhat Pham <nphamcs@gmail.com>
---
 include/linux/swap.h | 12 ++++++++++++
 mm/memory.c          | 13 +++++++------
 mm/shmem.c           |  7 +++----
 mm/swap_state.c      | 10 ++++------
 mm/userfaultfd.c     | 11 ++++++-----
 5 files changed, 32 insertions(+), 21 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 8b8c10356a5c..23eaf44791d4 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -709,5 +709,17 @@  static inline bool mem_cgroup_swap_full(struct folio *folio)
 }
 #endif
 
+static inline bool trylock_swapoff(swp_entry_t entry,
+				struct swap_info_struct **si)
+{
+	return get_swap_device(entry);
+}
+
+static inline void unlock_swapoff(swp_entry_t entry,
+				struct swap_info_struct *si)
+{
+	put_swap_device(si);
+}
+
 #endif /* __KERNEL__*/
 #endif /* _LINUX_SWAP_H */
diff --git a/mm/memory.c b/mm/memory.c
index fb7b8dc75167..e92914df5ca7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4305,6 +4305,7 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 	struct swap_info_struct *si = NULL;
 	rmap_t rmap_flags = RMAP_NONE;
 	bool need_clear_cache = false;
+	bool swapoff_locked = false;
 	bool exclusive = false;
 	swp_entry_t entry;
 	pte_t pte;
@@ -4365,8 +4366,8 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 	}
 
 	/* Prevent swapoff from happening to us. */
-	si = get_swap_device(entry);
-	if (unlikely(!si))
+	swapoff_locked = trylock_swapoff(entry, &si);
+	if (unlikely(!swapoff_locked))
 		goto out;
 
 	folio = swap_cache_get_folio(entry, vma, vmf->address);
@@ -4713,8 +4714,8 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 		if (waitqueue_active(&swapcache_wq))
 			wake_up(&swapcache_wq);
 	}
-	if (si)
-		put_swap_device(si);
+	if (swapoff_locked)
+		unlock_swapoff(entry, si);
 	return ret;
 out_nomap:
 	if (vmf->pte)
@@ -4732,8 +4733,8 @@  vm_fault_t do_swap_page(struct vm_fault *vmf)
 		if (waitqueue_active(&swapcache_wq))
 			wake_up(&swapcache_wq);
 	}
-	if (si)
-		put_swap_device(si);
+	if (swapoff_locked)
+		unlock_swapoff(entry, si);
 	return ret;
 }
 
diff --git a/mm/shmem.c b/mm/shmem.c
index 1ede0800e846..8ef72dcc592e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2262,8 +2262,7 @@  static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	if (is_poisoned_swp_entry(swap))
 		return -EIO;
 
-	si = get_swap_device(swap);
-	if (!si) {
+	if (!trylock_swapoff(swap, &si)) {
 		if (!shmem_confirm_swap(mapping, index, swap))
 			return -EEXIST;
 		else
@@ -2411,7 +2410,7 @@  static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 	}
 	folio_mark_dirty(folio);
 	swap_free_nr(swap, nr_pages);
-	put_swap_device(si);
+	unlock_swapoff(swap, si);
 
 	*foliop = folio;
 	return 0;
@@ -2428,7 +2427,7 @@  static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
 		folio_unlock(folio);
 		folio_put(folio);
 	}
-	put_swap_device(si);
+	unlock_swapoff(swap, si);
 
 	return error;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index ca42b2be64d9..81f69b2df550 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -419,12 +419,11 @@  struct folio *filemap_get_incore_folio(struct address_space *mapping,
 	if (non_swap_entry(swp))
 		return ERR_PTR(-ENOENT);
 	/* Prevent swapoff from happening to us */
-	si = get_swap_device(swp);
-	if (!si)
+	if (!trylock_swapoff(swp, &si))
 		return ERR_PTR(-ENOENT);
 	index = swap_cache_index(swp);
 	folio = filemap_get_folio(swap_address_space(swp), index);
-	put_swap_device(si);
+	unlock_swapoff(swp, si);
 	return folio;
 }
 
@@ -439,8 +438,7 @@  struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	void *shadow = NULL;
 
 	*new_page_allocated = false;
-	si = get_swap_device(entry);
-	if (!si)
+	if (!trylock_swapoff(entry, &si))
 		return NULL;
 
 	for (;;) {
@@ -538,7 +536,7 @@  struct folio *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 	put_swap_folio(new_folio, entry);
 	folio_unlock(new_folio);
 put_and_return:
-	put_swap_device(si);
+	unlock_swapoff(entry, si);
 	if (!(*new_page_allocated) && new_folio)
 		folio_put(new_folio);
 	return result;
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index d06453fa8aba..f40bbfd09fd5 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1161,6 +1161,7 @@  static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
 	struct folio *src_folio = NULL;
 	struct anon_vma *src_anon_vma = NULL;
 	struct mmu_notifier_range range;
+	bool swapoff_locked = false;
 	int err = 0;
 
 	flush_cache_range(src_vma, src_addr, src_addr + PAGE_SIZE);
@@ -1367,8 +1368,8 @@  static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
 			goto out;
 		}
 
-		si = get_swap_device(entry);
-		if (unlikely(!si)) {
+		swapoff_locked = trylock_swapoff(entry, &si);
+		if (unlikely(!swapoff_locked)) {
 			err = -EAGAIN;
 			goto out;
 		}
@@ -1399,7 +1400,7 @@  static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
 				pte_unmap(src_pte);
 				pte_unmap(dst_pte);
 				src_pte = dst_pte = NULL;
-				put_swap_device(si);
+				unlock_swapoff(entry, si);
 				si = NULL;
 				/* now we can block and wait */
 				folio_lock(src_folio);
@@ -1425,8 +1426,8 @@  static int move_pages_pte(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd,
 	if (src_pte)
 		pte_unmap(src_pte);
 	mmu_notifier_invalidate_range_end(&range);
-	if (si)
-		put_swap_device(si);
+	if (swapoff_locked)
+		unlock_swapoff(entry, si);
 
 	return err;
 }