btrfs: handle shrink_delalloc pages calculation differently

Message ID	f17b840611935b5f58bfcdbe050a942c33b90a60.1622576697.git.josef@toxicpanda.com
State	New
Headers	show Return-Path: <stable-owner@kernel.org> From: Josef Bacik <josef@toxicpanda.com> To: linux-btrfs@vger.kernel.org, kernel-team@fb.com Cc: stable@vger.kernel.org Subject: [PATCH] btrfs: handle shrink_delalloc pages calculation differently Date: Tue, 1 Jun 2021 15:45:08 -0400 Message-Id: <f17b840611935b5f58bfcdbe050a942c33b90a60.1622576697.git.josef@toxicpanda.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	btrfs: handle shrink_delalloc pages calculation differently \| expand btrfs: handle shrink_delalloc pages calculation differently

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5d0398528a7a..20d7121225d9 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2788,11 +2788,12 @@ enum btrfs_flush_state { FLUSH_DELAYED_REFS = 4, FLUSH_DELALLOC = 5, FLUSH_DELALLOC_WAIT = 6, - ALLOC_CHUNK = 7, - ALLOC_CHUNK_FORCE = 8, - RUN_DELAYED_IPUTS = 9, - COMMIT_TRANS = 10, - FORCE_COMMIT_TRANS = 11, + FLUSH_DELALLOC_FULL = 7, + ALLOC_CHUNK = 8, + ALLOC_CHUNK_FORCE = 9, + RUN_DELAYED_IPUTS = 10, + COMMIT_TRANS = 11, + FORCE_COMMIT_TRANS = 12, }; int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 42d0fa2092d4..fc329aff478f 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -505,6 +505,10 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, long time_left; int loops; + delalloc_bytes = percpu_counter_sum_positive( + &fs_info->delalloc_bytes); + ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes); + /* Calc the number of the pages we need flush for space reservation */ if (to_reclaim == U64_MAX) { items = U64_MAX; @@ -512,19 +516,21 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, /* * to_reclaim is set to however much metadata we need to * reclaim, but reclaiming that much data doesn't really track - * exactly, so increase the amount to reclaim by 2x in order to - * make sure we're flushing enough delalloc to hopefully reclaim - * some metadata reservations. + * exactly. What we really want to do is reclaim full inode's + * worth of reservations, however that's not available to us + * here. We will take a fraction of the delalloc bytes for our + * flushing loops and hope for the best. Delalloc will expand + * the amount we write to cover an entire dirty extent, which + * will reclaim the metadata reservation for that range. If + * it's not enough subsequent flush stages will be more + * aggressive. */ + to_reclaim = max(to_reclaim, delalloc_bytes >> 3); items = calc_reclaim_items_nr(fs_info, to_reclaim) * 2; - to_reclaim = items * EXTENT_SIZE_PER_ITEM; } trans = (struct btrfs_trans_handle *)current->journal_info; - delalloc_bytes = percpu_counter_sum_positive( - &fs_info->delalloc_bytes); - ordered_bytes = percpu_counter_sum_positive(&fs_info->ordered_bytes); if (delalloc_bytes == 0 && ordered_bytes == 0) return; @@ -710,8 +716,11 @@ static void flush_space(struct btrfs_fs_info *fs_info, break; case FLUSH_DELALLOC: case FLUSH_DELALLOC_WAIT: + case FLUSH_DELALLOC_FULL: + if (state == FLUSH_DELALLOC_FULL) + num_bytes = U64_MAX; shrink_delalloc(fs_info, space_info, num_bytes, - state == FLUSH_DELALLOC_WAIT, for_preempt); + state != FLUSH_DELALLOC, for_preempt); break; case FLUSH_DELAYED_REFS_NR: case FLUSH_DELAYED_REFS: @@ -1037,6 +1046,14 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work) commit_cycles--; } + /* + * We do not want to empty the system of delalloc unless we're + * under heavy pressure, so allow one trip through the flushing + * logic before we start doing a FLUSH_DELALLOC_FULL. + */ + if (flush_state == FLUSH_DELALLOC_FULL && !commit_cycles) + flush_state++; + /* * We don't want to force a chunk allocation until we've tried * pretty hard to reclaim space. Think of the case where we @@ -1219,7 +1236,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work) * so if we now have space to allocate do the force chunk allocation. */ static const enum btrfs_flush_state data_flush_states[] = { - FLUSH_DELALLOC_WAIT, + FLUSH_DELALLOC_FULL, RUN_DELAYED_IPUTS, FLUSH_DELAYED_REFS, COMMIT_TRANS, @@ -1309,6 +1326,7 @@ static const enum btrfs_flush_state evict_flush_states[] = { FLUSH_DELAYED_REFS, FLUSH_DELALLOC, FLUSH_DELALLOC_WAIT, + FLUSH_DELALLOC_FULL, ALLOC_CHUNK, COMMIT_TRANS, }; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 76e0be7e14d0..8144b8e345b5 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -94,6 +94,7 @@ struct btrfs_space_info; EM( FLUSH_DELAYED_ITEMS, "FLUSH_DELAYED_ITEMS") \ EM( FLUSH_DELALLOC, "FLUSH_DELALLOC") \ EM( FLUSH_DELALLOC_WAIT, "FLUSH_DELALLOC_WAIT") \ + EM( FLUSH_DELALLOC_FULL, "FLUSH_DELALLOC_FULL") \ EM( FLUSH_DELAYED_REFS_NR, "FLUSH_DELAYED_REFS_NR") \ EM( FLUSH_DELAYED_REFS, "FLUSH_ELAYED_REFS") \ EM( ALLOC_CHUNK, "ALLOC_CHUNK") \

btrfs: handle shrink_delalloc pages calculation differently

Commit Message

Comments

Patch