Skip to content

Commit 102c30a

Browse files
ryncsn1Naim
authored andcommitted
mm/vmscan: unify writeback reclaim statistic and throttling
Currently MGLRU and non-MGLRU handle the reclaim statistic and writeback handling very differently, especially throttling. Basically MGLRU just ignored the throttling part. Let's just unify this part, use a helper to deduplicate the code so both setups will share the same behavior. Test using following reproducer using bash: echo "Setup a slow device using dm delay" dd if=/dev/zero of=/var/tmp/backing bs=1M count=2048 LOOP=$(losetup --show -f /var/tmp/backing) mkfs.ext4 -q $LOOP echo "0 $(blockdev --getsz $LOOP) delay $LOOP 0 0 $LOOP 0 1000" | \ dmsetup create slow_dev mkdir -p /mnt/slow && mount /dev/mapper/slow_dev /mnt/slow echo "Start writeback pressure" sync && echo 3 > /proc/sys/vm/drop_caches mkdir /sys/fs/cgroup/test_wb echo 128M > /sys/fs/cgroup/test_wb/memory.max (echo $BASHPID > /sys/fs/cgroup/test_wb/cgroup.procs && \ dd if=/dev/zero of=/mnt/slow/testfile bs=1M count=192) echo "Clean up" echo "0 $(blockdev --getsz $LOOP) error" | dmsetup load slow_dev dmsetup resume slow_dev umount -l /mnt/slow && sync dmsetup remove slow_dev Before this commit, `dd` will get OOM killed immediately if MGLRU is enabled. Classic LRU is fine. After this commit, throttling is now effective and no more spin on LRU or premature OOM. Stress test on other workloads also looking good. Global throttling is not here yet, we will fix that separately later. Suggested-by: Chen Ridong <chenridong@huaweicloud.com> Tested-by: Leno Hou <lenohou@gmail.com> Signed-off-by: Kairui Song <kasong@tencent.com> Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
1 parent 4dcf69c commit 102c30a

1 file changed

Lines changed: 41 additions & 49 deletions

File tree

mm/vmscan.c

Lines changed: 41 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,44 @@ static int current_may_throttle(void)
19681968
return !(current->flags & PF_LOCAL_THROTTLE);
19691969
}
19701970

1971+
static void handle_reclaim_writeback(unsigned long nr_taken,
1972+
struct pglist_data *pgdat,
1973+
struct scan_control *sc,
1974+
struct reclaim_stat *stat)
1975+
{
1976+
/*
1977+
* If dirty folios are scanned that are not queued for IO, it
1978+
* implies that flushers are not doing their job. This can
1979+
* happen when memory pressure pushes dirty folios to the end of
1980+
* the LRU before the dirty limits are breached and the dirty
1981+
* data has expired. It can also happen when the proportion of
1982+
* dirty folios grows not through writes but through memory
1983+
* pressure reclaiming all the clean cache. And in some cases,
1984+
* the flushers simply cannot keep up with the allocation
1985+
* rate. Nudge the flusher threads in case they are asleep.
1986+
*/
1987+
if (stat->nr_unqueued_dirty == nr_taken && nr_taken) {
1988+
wakeup_flusher_threads(WB_REASON_VMSCAN);
1989+
/*
1990+
* For cgroupv1 dirty throttling is achieved by waking up
1991+
* the kernel flusher here and later waiting on folios
1992+
* which are in writeback to finish (see shrink_folio_list()).
1993+
*
1994+
* Flusher may not be able to issue writeback quickly
1995+
* enough for cgroupv1 writeback throttling to work
1996+
* on a large system.
1997+
*/
1998+
if (!writeback_throttling_sane(sc))
1999+
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
2000+
}
2001+
2002+
sc->nr.dirty += stat->nr_dirty;
2003+
sc->nr.congested += stat->nr_congested;
2004+
sc->nr.writeback += stat->nr_writeback;
2005+
sc->nr.immediate += stat->nr_immediate;
2006+
sc->nr.taken += nr_taken;
2007+
}
2008+
19712009
/*
19722010
* shrink_inactive_list() is a helper for shrink_node(). It returns the number
19732011
* of reclaimed pages
@@ -2035,39 +2073,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
20352073

20362074
lru_note_cost_unlock_irq(lruvec, file, stat.nr_pageout,
20372075
nr_scanned - nr_reclaimed);
2038-
2039-
/*
2040-
* If dirty folios are scanned that are not queued for IO, it
2041-
* implies that flushers are not doing their job. This can
2042-
* happen when memory pressure pushes dirty folios to the end of
2043-
* the LRU before the dirty limits are breached and the dirty
2044-
* data has expired. It can also happen when the proportion of
2045-
* dirty folios grows not through writes but through memory
2046-
* pressure reclaiming all the clean cache. And in some cases,
2047-
* the flushers simply cannot keep up with the allocation
2048-
* rate. Nudge the flusher threads in case they are asleep.
2049-
*/
2050-
if (stat.nr_unqueued_dirty == nr_taken) {
2051-
wakeup_flusher_threads(WB_REASON_VMSCAN);
2052-
/*
2053-
* For cgroupv1 dirty throttling is achieved by waking up
2054-
* the kernel flusher here and later waiting on folios
2055-
* which are in writeback to finish (see shrink_folio_list()).
2056-
*
2057-
* Flusher may not be able to issue writeback quickly
2058-
* enough for cgroupv1 writeback throttling to work
2059-
* on a large system.
2060-
*/
2061-
if (!writeback_throttling_sane(sc))
2062-
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
2063-
}
2064-
2065-
sc->nr.dirty += stat.nr_dirty;
2066-
sc->nr.congested += stat.nr_congested;
2067-
sc->nr.writeback += stat.nr_writeback;
2068-
sc->nr.immediate += stat.nr_immediate;
2069-
sc->nr.taken += nr_taken;
2070-
2076+
handle_reclaim_writeback(nr_taken, pgdat, sc, &stat);
20712077
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
20722078
nr_scanned, nr_reclaimed, &stat, sc->priority, file);
20732079
return nr_reclaimed;
@@ -4688,26 +4694,11 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
46884694
retry:
46894695
reclaimed = shrink_folio_list(&list, pgdat, sc, &stat, false, memcg);
46904696
sc->nr_reclaimed += reclaimed;
4697+
handle_reclaim_writeback(isolated, pgdat, sc, &stat);
46914698
trace_mm_vmscan_lru_shrink_inactive(pgdat->node_id,
46924699
type_scanned, reclaimed, &stat, sc->priority,
46934700
type ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON);
46944701

4695-
/*
4696-
* If too many file cache in the coldest generation can't be evicted
4697-
* due to being dirty, wake up the flusher.
4698-
*/
4699-
if (stat.nr_unqueued_dirty == isolated) {
4700-
wakeup_flusher_threads(WB_REASON_VMSCAN);
4701-
4702-
/*
4703-
* For cgroupv1 dirty throttling is achieved by waking up
4704-
* the kernel flusher here and later waiting on folios
4705-
* which are in writeback to finish (see shrink_folio_list()).
4706-
*/
4707-
if (!writeback_throttling_sane(sc))
4708-
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
4709-
}
4710-
47114702
list_for_each_entry_safe_reverse(folio, next, &list, lru) {
47124703
DEFINE_MIN_SEQ(lruvec);
47134704

@@ -4754,6 +4745,7 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
47544745

47554746
if (!list_empty(&list)) {
47564747
skip_retry = true;
4748+
isolated = 0;
47574749
goto retry;
47584750
}
47594751

0 commit comments

Comments
 (0)