Skip to content

Commit fad7011

Browse files
committed
Merge tag 'afs-fixes-20201016' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull afs updates from David Howells: "A collection of fixes to fix afs_cell struct refcounting, thereby fixing a slew of related syzbot bugs: - Fix the cell tree in the netns to use an rwsem rather than RCU. There seem to be some problems deriving from the use of RCU and a seqlock to walk the rbtree, but it's not entirely clear what since there are several different failures being seen. Changing things to use an rwsem instead makes it more robust. The extra performance derived from using RCU isn't necessary in this case since the only time we're looking up a cell is during mount or when cells are being manually added. - Fix the refcounting by splitting the usage counter into a memory refcount and an active users counter. The usage counter was doing double duty, keeping track of whether a cell is still in use and keeping track of when it needs to be destroyed - but this makes the clean up tricky. Separating these out simplifies the logic. - Fix purging a cell that has an alias. A cell alias pins the cell it's an alias of, but the alias is always later in the list. Trying to purge in a single pass causes rmmod to hang in such a case. - Fix cell removal. If a cell's manager is requeued whilst it's removing itself, the manager will run again and re-remove itself, causing problems in various places. Follow Hillf Danton's suggestion to insert a more terminal state that causes the manager to do nothing post-removal. In additional to the above, two other changes: - Add a tracepoint for the cell refcount and active users count. This helped with debugging the above and may be useful again in future. - Downgrade an assertion to a print when a still-active server is seen during purging. This was happening as a consequence of incomplete cell removal before the servers were cleaned up" * tag 'afs-fixes-20201016' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: afs: Don't assert on unpurgeable server records afs: Add tracing for cell refcount and active user count afs: Fix cell removal afs: Fix cell purging with aliases afs: Fix cell refcounting by splitting the usage counter afs: Fix rapid cell addition/removal by not using RCU on cells tree
2 parents 7a3dade + 7530d3e commit fad7011

12 files changed

Lines changed: 378 additions & 172 deletions

File tree

fs/afs/cell.c

Lines changed: 209 additions & 119 deletions
Large diffs are not rendered by default.

fs/afs/dynroot.c

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -123,9 +123,9 @@ static int afs_probe_cell_name(struct dentry *dentry)
123123
len--;
124124
}
125125

126-
cell = afs_lookup_cell_rcu(net, name, len);
126+
cell = afs_find_cell(net, name, len, afs_cell_trace_use_probe);
127127
if (!IS_ERR(cell)) {
128-
afs_put_cell(net, cell);
128+
afs_unuse_cell(net, cell, afs_cell_trace_unuse_probe);
129129
return 0;
130130
}
131131

@@ -179,7 +179,6 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry)
179179
struct afs_cell *cell;
180180
struct afs_net *net = afs_d2net(dentry);
181181
struct dentry *ret;
182-
unsigned int seq = 0;
183182
char *name;
184183
int len;
185184

@@ -191,17 +190,13 @@ static struct dentry *afs_lookup_atcell(struct dentry *dentry)
191190
if (!name)
192191
goto out_p;
193192

194-
rcu_read_lock();
195-
do {
196-
read_seqbegin_or_lock(&net->cells_lock, &seq);
197-
cell = rcu_dereference_raw(net->ws_cell);
198-
if (cell) {
199-
len = cell->name_len;
200-
memcpy(name, cell->name, len + 1);
201-
}
202-
} while (need_seqretry(&net->cells_lock, seq));
203-
done_seqretry(&net->cells_lock, seq);
204-
rcu_read_unlock();
193+
down_read(&net->cells_lock);
194+
cell = net->ws_cell;
195+
if (cell) {
196+
len = cell->name_len;
197+
memcpy(name, cell->name, len + 1);
198+
}
199+
up_read(&net->cells_lock);
205200

206201
ret = ERR_PTR(-ENOENT);
207202
if (!cell)

fs/afs/internal.h

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -263,11 +263,11 @@ struct afs_net {
263263

264264
/* Cell database */
265265
struct rb_root cells;
266-
struct afs_cell __rcu *ws_cell;
266+
struct afs_cell *ws_cell;
267267
struct work_struct cells_manager;
268268
struct timer_list cells_timer;
269269
atomic_t cells_outstanding;
270-
seqlock_t cells_lock;
270+
struct rw_semaphore cells_lock;
271271
struct mutex cells_alias_lock;
272272

273273
struct mutex proc_cells_lock;
@@ -326,6 +326,7 @@ enum afs_cell_state {
326326
AFS_CELL_DEACTIVATING,
327327
AFS_CELL_INACTIVE,
328328
AFS_CELL_FAILED,
329+
AFS_CELL_REMOVED,
329330
};
330331

331332
/*
@@ -363,7 +364,8 @@ struct afs_cell {
363364
#endif
364365
time64_t dns_expiry; /* Time AFSDB/SRV record expires */
365366
time64_t last_inactive; /* Time of last drop of usage count */
366-
atomic_t usage;
367+
atomic_t ref; /* Struct refcount */
368+
atomic_t active; /* Active usage counter */
367369
unsigned long flags;
368370
#define AFS_CELL_FL_NO_GC 0 /* The cell was added manually, don't auto-gc */
369371
#define AFS_CELL_FL_DO_LOOKUP 1 /* DNS lookup requested */
@@ -373,6 +375,7 @@ struct afs_cell {
373375
enum dns_record_source dns_source:8; /* Latest source of data from lookup */
374376
enum dns_lookup_status dns_status:8; /* Latest status of data from lookup */
375377
unsigned int dns_lookup_count; /* Counter of DNS lookups */
378+
unsigned int debug_id;
376379

377380
/* The volumes belonging to this cell */
378381
struct rb_root volumes; /* Tree of volumes on this server */
@@ -917,11 +920,16 @@ static inline bool afs_cb_is_broken(unsigned int cb_break,
917920
* cell.c
918921
*/
919922
extern int afs_cell_init(struct afs_net *, const char *);
920-
extern struct afs_cell *afs_lookup_cell_rcu(struct afs_net *, const char *, unsigned);
923+
extern struct afs_cell *afs_find_cell(struct afs_net *, const char *, unsigned,
924+
enum afs_cell_trace);
921925
extern struct afs_cell *afs_lookup_cell(struct afs_net *, const char *, unsigned,
922926
const char *, bool);
923-
extern struct afs_cell *afs_get_cell(struct afs_cell *);
924-
extern void afs_put_cell(struct afs_net *, struct afs_cell *);
927+
extern struct afs_cell *afs_use_cell(struct afs_cell *, enum afs_cell_trace);
928+
extern void afs_unuse_cell(struct afs_net *, struct afs_cell *, enum afs_cell_trace);
929+
extern struct afs_cell *afs_get_cell(struct afs_cell *, enum afs_cell_trace);
930+
extern void afs_see_cell(struct afs_cell *, enum afs_cell_trace);
931+
extern void afs_put_cell(struct afs_cell *, enum afs_cell_trace);
932+
extern void afs_queue_cell(struct afs_cell *, enum afs_cell_trace);
925933
extern void afs_manage_cells(struct work_struct *);
926934
extern void afs_cells_timer(struct timer_list *);
927935
extern void __net_exit afs_cell_purge(struct afs_net *);

fs/afs/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static int __net_init afs_net_init(struct net *net_ns)
7878
mutex_init(&net->socket_mutex);
7979

8080
net->cells = RB_ROOT;
81-
seqlock_init(&net->cells_lock);
81+
init_rwsem(&net->cells_lock);
8282
INIT_WORK(&net->cells_manager, afs_manage_cells);
8383
timer_setup(&net->cells_timer, afs_cells_timer, 0);
8484

fs/afs/mntpt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
8888
ctx->force = true;
8989
}
9090
if (ctx->cell) {
91-
afs_put_cell(ctx->net, ctx->cell);
91+
afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_mntpt);
9292
ctx->cell = NULL;
9393
}
9494
if (test_bit(AFS_VNODE_PSEUDODIR, &vnode->flags)) {
@@ -124,7 +124,7 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt)
124124
char *buf;
125125

126126
if (src_as->cell)
127-
ctx->cell = afs_get_cell(src_as->cell);
127+
ctx->cell = afs_use_cell(src_as->cell, afs_cell_trace_use_mntpt);
128128

129129
if (size < 2 || size > PAGE_SIZE - 1)
130130
return -EINVAL;

fs/afs/proc.c

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,18 +38,19 @@ static int afs_proc_cells_show(struct seq_file *m, void *v)
3838

3939
if (v == SEQ_START_TOKEN) {
4040
/* display header on line 1 */
41-
seq_puts(m, "USE TTL SV ST NAME\n");
41+
seq_puts(m, "USE ACT TTL SV ST NAME\n");
4242
return 0;
4343
}
4444

4545
cell = list_entry(v, struct afs_cell, proc_link);
4646
vllist = rcu_dereference(cell->vl_servers);
4747

4848
/* display one cell per line on subsequent lines */
49-
seq_printf(m, "%3u %6lld %2u %2u %s\n",
50-
atomic_read(&cell->usage),
49+
seq_printf(m, "%3u %3u %6lld %2u %2u %s\n",
50+
atomic_read(&cell->ref),
51+
atomic_read(&cell->active),
5152
cell->dns_expiry - ktime_get_real_seconds(),
52-
vllist->nr_servers,
53+
vllist ? vllist->nr_servers : 0,
5354
cell->state,
5455
cell->name);
5556
return 0;
@@ -128,7 +129,7 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
128129
}
129130

130131
if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
131-
afs_put_cell(net, cell);
132+
afs_unuse_cell(net, cell, afs_cell_trace_unuse_no_pin);
132133
} else {
133134
goto inval;
134135
}
@@ -154,13 +155,11 @@ static int afs_proc_rootcell_show(struct seq_file *m, void *v)
154155
struct afs_net *net;
155156

156157
net = afs_seq2net_single(m);
157-
if (rcu_access_pointer(net->ws_cell)) {
158-
rcu_read_lock();
159-
cell = rcu_dereference(net->ws_cell);
160-
if (cell)
161-
seq_printf(m, "%s\n", cell->name);
162-
rcu_read_unlock();
163-
}
158+
down_read(&net->cells_lock);
159+
cell = net->ws_cell;
160+
if (cell)
161+
seq_printf(m, "%s\n", cell->name);
162+
up_read(&net->cells_lock);
164163
return 0;
165164
}
166165

fs/afs/server.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,12 @@ void afs_manage_servers(struct work_struct *work)
550550

551551
_debug("manage %pU %u", &server->uuid, active);
552552

553-
ASSERTIFCMP(purging, active, ==, 0);
553+
if (purging) {
554+
trace_afs_server(server, atomic_read(&server->ref),
555+
active, afs_server_trace_purging);
556+
if (active != 0)
557+
pr_notice("Can't purge s=%08x\n", server->debug_id);
558+
}
554559

555560
if (active == 0) {
556561
time64_t expire_at = server->unuse_time;

fs/afs/super.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ static int afs_parse_source(struct fs_context *fc, struct fs_parameter *param)
294294
cellnamesz, cellnamesz, cellname ?: "");
295295
return PTR_ERR(cell);
296296
}
297-
afs_put_cell(ctx->net, ctx->cell);
297+
afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_parse);
298+
afs_see_cell(cell, afs_cell_trace_see_source);
298299
ctx->cell = cell;
299300
}
300301

@@ -389,8 +390,9 @@ static int afs_validate_fc(struct fs_context *fc)
389390
_debug("switch to alias");
390391
key_put(ctx->key);
391392
ctx->key = NULL;
392-
cell = afs_get_cell(ctx->cell->alias_of);
393-
afs_put_cell(ctx->net, ctx->cell);
393+
cell = afs_use_cell(ctx->cell->alias_of,
394+
afs_cell_trace_use_fc_alias);
395+
afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
394396
ctx->cell = cell;
395397
goto reget_key;
396398
}
@@ -507,7 +509,7 @@ static struct afs_super_info *afs_alloc_sbi(struct fs_context *fc)
507509
if (ctx->dyn_root) {
508510
as->dyn_root = true;
509511
} else {
510-
as->cell = afs_get_cell(ctx->cell);
512+
as->cell = afs_use_cell(ctx->cell, afs_cell_trace_use_sbi);
511513
as->volume = afs_get_volume(ctx->volume,
512514
afs_volume_trace_get_alloc_sbi);
513515
}
@@ -520,7 +522,7 @@ static void afs_destroy_sbi(struct afs_super_info *as)
520522
if (as) {
521523
struct afs_net *net = afs_net(as->net_ns);
522524
afs_put_volume(net, as->volume, afs_volume_trace_put_destroy_sbi);
523-
afs_put_cell(net, as->cell);
525+
afs_unuse_cell(net, as->cell, afs_cell_trace_unuse_sbi);
524526
put_net(as->net_ns);
525527
kfree(as);
526528
}
@@ -606,7 +608,7 @@ static void afs_free_fc(struct fs_context *fc)
606608

607609
afs_destroy_sbi(fc->s_fs_info);
608610
afs_put_volume(ctx->net, ctx->volume, afs_volume_trace_put_free_fc);
609-
afs_put_cell(ctx->net, ctx->cell);
611+
afs_unuse_cell(ctx->net, ctx->cell, afs_cell_trace_unuse_fc);
610612
key_put(ctx->key);
611613
kfree(ctx);
612614
}
@@ -633,9 +635,7 @@ static int afs_init_fs_context(struct fs_context *fc)
633635
ctx->net = afs_net(fc->net_ns);
634636

635637
/* Default to the workstation cell. */
636-
rcu_read_lock();
637-
cell = afs_lookup_cell_rcu(ctx->net, NULL, 0);
638-
rcu_read_unlock();
638+
cell = afs_find_cell(ctx->net, NULL, 0, afs_cell_trace_use_fc);
639639
if (IS_ERR(cell))
640640
cell = NULL;
641641
ctx->cell = cell;

fs/afs/vl_alias.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ static int afs_compare_cell_roots(struct afs_cell *cell)
177177

178178
is_alias:
179179
rcu_read_unlock();
180-
cell->alias_of = afs_get_cell(p);
180+
cell->alias_of = afs_use_cell(p, afs_cell_trace_use_alias);
181181
return 1;
182182
}
183183

@@ -247,18 +247,18 @@ static int afs_query_for_alias(struct afs_cell *cell, struct key *key)
247247
continue;
248248
if (p->root_volume)
249249
continue; /* Ignore cells that have a root.cell volume. */
250-
afs_get_cell(p);
250+
afs_use_cell(p, afs_cell_trace_use_check_alias);
251251
mutex_unlock(&cell->net->proc_cells_lock);
252252

253253
if (afs_query_for_alias_one(cell, key, p) != 0)
254254
goto is_alias;
255255

256256
if (mutex_lock_interruptible(&cell->net->proc_cells_lock) < 0) {
257-
afs_put_cell(cell->net, p);
257+
afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
258258
return -ERESTARTSYS;
259259
}
260260

261-
afs_put_cell(cell->net, p);
261+
afs_unuse_cell(cell->net, p, afs_cell_trace_unuse_check_alias);
262262
}
263263

264264
mutex_unlock(&cell->net->proc_cells_lock);

fs/afs/vl_rotate.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ static bool afs_start_vl_iteration(struct afs_vl_cursor *vc)
4545
cell->dns_expiry <= ktime_get_real_seconds()) {
4646
dns_lookup_count = smp_load_acquire(&cell->dns_lookup_count);
4747
set_bit(AFS_CELL_FL_DO_LOOKUP, &cell->flags);
48-
queue_work(afs_wq, &cell->manager);
48+
afs_queue_cell(cell, afs_cell_trace_get_queue_dns);
4949

5050
if (cell->dns_source == DNS_RECORD_UNAVAILABLE) {
5151
if (wait_var_event_interruptible(

0 commit comments

Comments
 (0)