Skip to content

Commit a17a3ca

Browse files
committed
Merge tag 'for-5.10-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few fixes for various warnings that accumulated over past two weeks: - tree-checker: add missing return values for some errors - lockdep fixes - when reading qgroup config and starting quota rescan - reverse order of quota ioctl lock and VFS freeze lock - avoid accessing potentially stale fs info during device scan, reported by syzbot - add scope NOFS protection around qgroup relation changes - check for running transaction before flushing qgroups - fix tracking of new delalloc ranges for some cases" * tag 'for-5.10-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: fix lockdep splat when enabling and disabling qgroups btrfs: do nofs allocations when adding and removing qgroup relations btrfs: fix lockdep splat when reading qgroup config on mount btrfs: tree-checker: add missing returns after data_ref alignment checks btrfs: don't access possibly stale fs_info data for printing duplicate device btrfs: tree-checker: add missing return after error in root_item btrfs: qgroup: don't commit transaction when we already hold the handle btrfs: fix missing delalloc new bit for new delalloc ranges
2 parents d41e9b2 + a855fbe commit a17a3ca

7 files changed

Lines changed: 158 additions & 73 deletions

File tree

fs/btrfs/ctree.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,10 @@ struct btrfs_fs_info {
878878
*/
879879
struct ulist *qgroup_ulist;
880880

881-
/* protect user change for quota operations */
881+
/*
882+
* Protect user change for quota operations. If a transaction is needed,
883+
* it must be started before locking this lock.
884+
*/
882885
struct mutex qgroup_ioctl_lock;
883886

884887
/* list of dirty qgroups to be written at next commit */

fs/btrfs/file.c

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -452,46 +452,6 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
452452
}
453453
}
454454

455-
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
456-
const u64 start,
457-
const u64 len,
458-
struct extent_state **cached_state)
459-
{
460-
u64 search_start = start;
461-
const u64 end = start + len - 1;
462-
463-
while (search_start < end) {
464-
const u64 search_len = end - search_start + 1;
465-
struct extent_map *em;
466-
u64 em_len;
467-
int ret = 0;
468-
469-
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
470-
if (IS_ERR(em))
471-
return PTR_ERR(em);
472-
473-
if (em->block_start != EXTENT_MAP_HOLE)
474-
goto next;
475-
476-
em_len = em->len;
477-
if (em->start < search_start)
478-
em_len -= search_start - em->start;
479-
if (em_len > search_len)
480-
em_len = search_len;
481-
482-
ret = set_extent_bit(&inode->io_tree, search_start,
483-
search_start + em_len - 1,
484-
EXTENT_DELALLOC_NEW,
485-
NULL, cached_state, GFP_NOFS);
486-
next:
487-
search_start = extent_map_end(em);
488-
free_extent_map(em);
489-
if (ret)
490-
return ret;
491-
}
492-
return 0;
493-
}
494-
495455
/*
496456
* after copy_from_user, pages need to be dirtied and we need to make
497457
* sure holes are created between the current EOF and the start of
@@ -528,23 +488,6 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
528488
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
529489
0, 0, cached);
530490

531-
if (!btrfs_is_free_space_inode(inode)) {
532-
if (start_pos >= isize &&
533-
!(inode->flags & BTRFS_INODE_PREALLOC)) {
534-
/*
535-
* There can't be any extents following eof in this case
536-
* so just set the delalloc new bit for the range
537-
* directly.
538-
*/
539-
extra_bits |= EXTENT_DELALLOC_NEW;
540-
} else {
541-
err = btrfs_find_new_delalloc_bytes(inode, start_pos,
542-
num_bytes, cached);
543-
if (err)
544-
return err;
545-
}
546-
}
547-
548491
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
549492
extra_bits, cached);
550493
if (err)

fs/btrfs/inode.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2253,11 +2253,69 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
22532253
return 0;
22542254
}
22552255

2256+
static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
2257+
const u64 start,
2258+
const u64 len,
2259+
struct extent_state **cached_state)
2260+
{
2261+
u64 search_start = start;
2262+
const u64 end = start + len - 1;
2263+
2264+
while (search_start < end) {
2265+
const u64 search_len = end - search_start + 1;
2266+
struct extent_map *em;
2267+
u64 em_len;
2268+
int ret = 0;
2269+
2270+
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
2271+
if (IS_ERR(em))
2272+
return PTR_ERR(em);
2273+
2274+
if (em->block_start != EXTENT_MAP_HOLE)
2275+
goto next;
2276+
2277+
em_len = em->len;
2278+
if (em->start < search_start)
2279+
em_len -= search_start - em->start;
2280+
if (em_len > search_len)
2281+
em_len = search_len;
2282+
2283+
ret = set_extent_bit(&inode->io_tree, search_start,
2284+
search_start + em_len - 1,
2285+
EXTENT_DELALLOC_NEW,
2286+
NULL, cached_state, GFP_NOFS);
2287+
next:
2288+
search_start = extent_map_end(em);
2289+
free_extent_map(em);
2290+
if (ret)
2291+
return ret;
2292+
}
2293+
return 0;
2294+
}
2295+
22562296
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
22572297
unsigned int extra_bits,
22582298
struct extent_state **cached_state)
22592299
{
22602300
WARN_ON(PAGE_ALIGNED(end));
2301+
2302+
if (start >= i_size_read(&inode->vfs_inode) &&
2303+
!(inode->flags & BTRFS_INODE_PREALLOC)) {
2304+
/*
2305+
* There can't be any extents following eof in this case so just
2306+
* set the delalloc new bit for the range directly.
2307+
*/
2308+
extra_bits |= EXTENT_DELALLOC_NEW;
2309+
} else {
2310+
int ret;
2311+
2312+
ret = btrfs_find_new_delalloc_bytes(inode, start,
2313+
end + 1 - start,
2314+
cached_state);
2315+
if (ret)
2316+
return ret;
2317+
}
2318+
22612319
return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
22622320
cached_state);
22632321
}

fs/btrfs/qgroup.c

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/slab.h>
1212
#include <linux/workqueue.h>
1313
#include <linux/btrfs.h>
14+
#include <linux/sched/mm.h>
1415

1516
#include "ctree.h"
1617
#include "transaction.h"
@@ -497,13 +498,13 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
497498
break;
498499
}
499500
out:
501+
btrfs_free_path(path);
500502
fs_info->qgroup_flags |= flags;
501503
if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON))
502504
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
503505
else if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN &&
504506
ret >= 0)
505507
ret = qgroup_rescan_init(fs_info, rescan_progress, 0);
506-
btrfs_free_path(path);
507508

508509
if (ret < 0) {
509510
ulist_free(fs_info->qgroup_ulist);
@@ -936,22 +937,39 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
936937
struct btrfs_key found_key;
937938
struct btrfs_qgroup *qgroup = NULL;
938939
struct btrfs_trans_handle *trans = NULL;
940+
struct ulist *ulist = NULL;
939941
int ret = 0;
940942
int slot;
941943

942944
mutex_lock(&fs_info->qgroup_ioctl_lock);
943945
if (fs_info->quota_root)
944946
goto out;
945947

946-
fs_info->qgroup_ulist = ulist_alloc(GFP_KERNEL);
947-
if (!fs_info->qgroup_ulist) {
948+
ulist = ulist_alloc(GFP_KERNEL);
949+
if (!ulist) {
948950
ret = -ENOMEM;
949951
goto out;
950952
}
951953

952954
ret = btrfs_sysfs_add_qgroups(fs_info);
953955
if (ret < 0)
954956
goto out;
957+
958+
/*
959+
* Unlock qgroup_ioctl_lock before starting the transaction. This is to
960+
* avoid lock acquisition inversion problems (reported by lockdep) between
961+
* qgroup_ioctl_lock and the vfs freeze semaphores, acquired when we
962+
* start a transaction.
963+
* After we started the transaction lock qgroup_ioctl_lock again and
964+
* check if someone else created the quota root in the meanwhile. If so,
965+
* just return success and release the transaction handle.
966+
*
967+
* Also we don't need to worry about someone else calling
968+
* btrfs_sysfs_add_qgroups() after we unlock and getting an error because
969+
* that function returns 0 (success) when the sysfs entries already exist.
970+
*/
971+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
972+
955973
/*
956974
* 1 for quota root item
957975
* 1 for BTRFS_QGROUP_STATUS item
@@ -961,12 +979,20 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
961979
* would be a lot of overkill.
962980
*/
963981
trans = btrfs_start_transaction(tree_root, 2);
982+
983+
mutex_lock(&fs_info->qgroup_ioctl_lock);
964984
if (IS_ERR(trans)) {
965985
ret = PTR_ERR(trans);
966986
trans = NULL;
967987
goto out;
968988
}
969989

990+
if (fs_info->quota_root)
991+
goto out;
992+
993+
fs_info->qgroup_ulist = ulist;
994+
ulist = NULL;
995+
970996
/*
971997
* initially create the quota tree
972998
*/
@@ -1124,11 +1150,14 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
11241150
if (ret) {
11251151
ulist_free(fs_info->qgroup_ulist);
11261152
fs_info->qgroup_ulist = NULL;
1127-
if (trans)
1128-
btrfs_end_transaction(trans);
11291153
btrfs_sysfs_del_qgroups(fs_info);
11301154
}
11311155
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1156+
if (ret && trans)
1157+
btrfs_end_transaction(trans);
1158+
else if (trans)
1159+
ret = btrfs_end_transaction(trans);
1160+
ulist_free(ulist);
11321161
return ret;
11331162
}
11341163

@@ -1141,19 +1170,29 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11411170
mutex_lock(&fs_info->qgroup_ioctl_lock);
11421171
if (!fs_info->quota_root)
11431172
goto out;
1173+
mutex_unlock(&fs_info->qgroup_ioctl_lock);
11441174

11451175
/*
11461176
* 1 For the root item
11471177
*
11481178
* We should also reserve enough items for the quota tree deletion in
11491179
* btrfs_clean_quota_tree but this is not done.
1180+
*
1181+
* Also, we must always start a transaction without holding the mutex
1182+
* qgroup_ioctl_lock, see btrfs_quota_enable().
11501183
*/
11511184
trans = btrfs_start_transaction(fs_info->tree_root, 1);
1185+
1186+
mutex_lock(&fs_info->qgroup_ioctl_lock);
11521187
if (IS_ERR(trans)) {
11531188
ret = PTR_ERR(trans);
1189+
trans = NULL;
11541190
goto out;
11551191
}
11561192

1193+
if (!fs_info->quota_root)
1194+
goto out;
1195+
11571196
clear_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
11581197
btrfs_qgroup_wait_for_completion(fs_info, false);
11591198
spin_lock(&fs_info->qgroup_lock);
@@ -1167,13 +1206,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11671206
ret = btrfs_clean_quota_tree(trans, quota_root);
11681207
if (ret) {
11691208
btrfs_abort_transaction(trans, ret);
1170-
goto end_trans;
1209+
goto out;
11711210
}
11721211

11731212
ret = btrfs_del_root(trans, &quota_root->root_key);
11741213
if (ret) {
11751214
btrfs_abort_transaction(trans, ret);
1176-
goto end_trans;
1215+
goto out;
11771216
}
11781217

11791218
list_del(&quota_root->dirty_list);
@@ -1185,10 +1224,13 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
11851224

11861225
btrfs_put_root(quota_root);
11871226

1188-
end_trans:
1189-
ret = btrfs_end_transaction(trans);
11901227
out:
11911228
mutex_unlock(&fs_info->qgroup_ioctl_lock);
1229+
if (ret && trans)
1230+
btrfs_end_transaction(trans);
1231+
else if (trans)
1232+
ret = btrfs_end_transaction(trans);
1233+
11921234
return ret;
11931235
}
11941236

@@ -1324,13 +1366,17 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
13241366
struct btrfs_qgroup *member;
13251367
struct btrfs_qgroup_list *list;
13261368
struct ulist *tmp;
1369+
unsigned int nofs_flag;
13271370
int ret = 0;
13281371

13291372
/* Check the level of src and dst first */
13301373
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
13311374
return -EINVAL;
13321375

1376+
/* We hold a transaction handle open, must do a NOFS allocation. */
1377+
nofs_flag = memalloc_nofs_save();
13331378
tmp = ulist_alloc(GFP_KERNEL);
1379+
memalloc_nofs_restore(nofs_flag);
13341380
if (!tmp)
13351381
return -ENOMEM;
13361382

@@ -1387,10 +1433,14 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
13871433
struct btrfs_qgroup_list *list;
13881434
struct ulist *tmp;
13891435
bool found = false;
1436+
unsigned int nofs_flag;
13901437
int ret = 0;
13911438
int ret2;
13921439

1440+
/* We hold a transaction handle open, must do a NOFS allocation. */
1441+
nofs_flag = memalloc_nofs_save();
13931442
tmp = ulist_alloc(GFP_KERNEL);
1443+
memalloc_nofs_restore(nofs_flag);
13941444
if (!tmp)
13951445
return -ENOMEM;
13961446

@@ -3512,6 +3562,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
35123562
{
35133563
struct btrfs_trans_handle *trans;
35143564
int ret;
3565+
bool can_commit = true;
35153566

35163567
/*
35173568
* We don't want to run flush again and again, so if there is a running
@@ -3523,6 +3574,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
35233574
return 0;
35243575
}
35253576

3577+
/*
3578+
* If current process holds a transaction, we shouldn't flush, as we
3579+
* assume all space reservation happens before a transaction handle is
3580+
* held.
3581+
*
3582+
* But there are cases like btrfs_delayed_item_reserve_metadata() where
3583+
* we try to reserve space with one transction handle already held.
3584+
* In that case we can't commit transaction, but at least try to end it
3585+
* and hope the started data writes can free some space.
3586+
*/
3587+
if (current->journal_info &&
3588+
current->journal_info != BTRFS_SEND_TRANS_STUB)
3589+
can_commit = false;
3590+
35263591
ret = btrfs_start_delalloc_snapshot(root);
35273592
if (ret < 0)
35283593
goto out;
@@ -3534,7 +3599,10 @@ static int try_flush_qgroup(struct btrfs_root *root)
35343599
goto out;
35353600
}
35363601

3537-
ret = btrfs_commit_transaction(trans);
3602+
if (can_commit)
3603+
ret = btrfs_commit_transaction(trans);
3604+
else
3605+
ret = btrfs_end_transaction(trans);
35383606
out:
35393607
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
35403608
wake_up(&root->qgroup_flush_wait);

0 commit comments

Comments
 (0)