Skip to content

Commit cab143a

Browse files
ryncsn1Naim
authored andcommitted
mm/mglru: restructure the reclaim loop
The current loop will calculate the scan number on each iteration. The number of folios to scan is based on the LRU length, with some unclear behaviors, eg, the scan number is only shifted by reclaim priority when aging is not needed or when at the default priority, and it couples the number calculation with aging and rotation. Adjust, simplify it, and decouple aging and rotation. Just calculate the scan number for once at the beginning of the reclaim, always respect the reclaim priority, and make the aging and rotation more explicit. This slightly changes how aging and offline memcg reclaim works: Previously, aging was always skipped at DEF_PRIORITY even when eviction was impossible. Now, aging is always triggered when it is necessary to make progress. The old behavior may waste a reclaim iteration only to escalate priority, potentially causing over-reclaim of slab and breaking reclaim balance in multi-cgroup setups. Similar for offline memcg. Previously, offline memcg wouldn't be aged unless it didn't have any evictable folios. Now, we might age it if it has only 3 generations and the reclaim priority is less than DEF_PRIORITY, which should be fine. On one hand, offline memcg might still hold long-term folios, and in fact, a long-existing offline memcg must be pinned by some long-term folios like shmem. These folios might be used by other memcg, so aging them as ordinary memcg seems correct. Besides, aging enables further reclaim of an offlined memcg, which will certainly happen if we keep shrinking it. And offline memcg might soon be no longer an issue with reparenting. Overall, the memcg LRU rotation, as described in mmzone.h, remains the same. Reviewed-by: Axel Rasmussen <axelrasmussen@google.com> Signed-off-by: Kairui Song <kasong@tencent.com>
1 parent 77bbf72 commit cab143a

1 file changed

Lines changed: 40 additions & 34 deletions

File tree

mm/vmscan.c

Lines changed: 40 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -4781,49 +4781,44 @@ static int evict_folios(unsigned long nr_to_scan, struct lruvec *lruvec,
47814781
}
47824782

47834783
static bool should_run_aging(struct lruvec *lruvec, unsigned long max_seq,
4784-
int swappiness, unsigned long *nr_to_scan)
4784+
struct scan_control *sc, int swappiness)
47854785
{
47864786
DEFINE_MIN_SEQ(lruvec);
47874787

4788-
*nr_to_scan = 0;
47894788
/* have to run aging, since eviction is not possible anymore */
47904789
if (evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS > max_seq)
47914790
return true;
47924791

4793-
*nr_to_scan = lruvec_evictable_size(lruvec, swappiness);
4792+
/* try to get away with not aging at the default priority */
4793+
if (sc->priority == DEF_PRIORITY)
4794+
return false;
4795+
47944796
/* better to run aging even though eviction is still possible */
47954797
return evictable_min_seq(min_seq, swappiness) + MIN_NR_GENS == max_seq;
47964798
}
47974799

4798-
/*
4799-
* For future optimizations:
4800-
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
4801-
* reclaim.
4802-
*/
4803-
static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc, int swappiness)
4800+
static long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *sc,
4801+
struct mem_cgroup *memcg, int swappiness)
48044802
{
4805-
bool need_aging;
4806-
unsigned long nr_to_scan;
4807-
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
4808-
DEFINE_MAX_SEQ(lruvec);
4809-
4810-
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg))
4811-
return -1;
4812-
4813-
need_aging = should_run_aging(lruvec, max_seq, swappiness, &nr_to_scan);
4803+
unsigned long evictable, nr_to_scan;
48144804

4805+
evictable = lruvec_evictable_size(lruvec, swappiness);
4806+
nr_to_scan = evictable;
48154807
/* try to scrape all its memory if this memcg was deleted */
4816-
if (nr_to_scan && !mem_cgroup_online(memcg))
4808+
if (!mem_cgroup_online(memcg))
48174809
return nr_to_scan;
48184810

48194811
nr_to_scan = apply_proportional_protection(memcg, sc, nr_to_scan);
48204812

4821-
/* try to get away with not aging at the default priority */
4822-
if (!need_aging || sc->priority == DEF_PRIORITY)
4823-
return nr_to_scan >> sc->priority;
4813+
/*
4814+
* Always respect scan priority, minimally target some folios
4815+
* to keep reclaim moving forwards.
4816+
*/
4817+
nr_to_scan >>= sc->priority;
4818+
if (!nr_to_scan)
4819+
nr_to_scan = min(evictable, SWAP_CLUSTER_MAX);
48244820

4825-
/* stop scanning this lruvec as it's low on cold folios */
4826-
return try_to_inc_max_seq(lruvec, max_seq, swappiness, false) ? -1 : 0;
4821+
return nr_to_scan;
48274822
}
48284823

48294824
static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
@@ -4857,31 +4852,43 @@ static bool should_abort_scan(struct lruvec *lruvec, struct scan_control *sc)
48574852
return true;
48584853
}
48594854

4855+
/*
4856+
* For future optimizations:
4857+
* 1. Defer try_to_inc_max_seq() to workqueues to reduce latency for memcg
4858+
* reclaim.
4859+
*/
48604860
static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
48614861
{
4862+
bool need_rotate = false;
48624863
long nr_batch, nr_to_scan;
4863-
unsigned long scanned = 0;
48644864
int swappiness = get_swappiness(lruvec, sc);
4865+
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
48654866

4866-
while (true) {
4867+
nr_to_scan = get_nr_to_scan(lruvec, sc, memcg, swappiness);
4868+
while (nr_to_scan > 0) {
48674869
int delta;
4870+
DEFINE_MAX_SEQ(lruvec);
48684871

4869-
nr_to_scan = get_nr_to_scan(lruvec, sc, swappiness);
4870-
if (nr_to_scan <= 0)
4872+
if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg)) {
4873+
need_rotate = true;
48714874
break;
4875+
}
4876+
4877+
if (should_run_aging(lruvec, max_seq, sc, swappiness)) {
4878+
if (try_to_inc_max_seq(lruvec, max_seq, swappiness, false))
4879+
need_rotate = true;
4880+
break;
4881+
}
48724882

48734883
nr_batch = min(nr_to_scan, MAX_LRU_BATCH);
48744884
delta = evict_folios(nr_batch, lruvec, sc, swappiness);
48754885
if (!delta)
48764886
break;
48774887

4878-
scanned += delta;
4879-
if (scanned >= nr_to_scan)
4880-
break;
4881-
48824888
if (should_abort_scan(lruvec, sc))
48834889
break;
48844890

4891+
nr_to_scan -= delta;
48854892
cond_resched();
48864893
}
48874894

@@ -4907,8 +4914,7 @@ static bool try_to_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
49074914
reclaim_throttle(pgdat, VMSCAN_THROTTLE_WRITEBACK);
49084915
}
49094916

4910-
/* whether this lruvec should be rotated */
4911-
return nr_to_scan < 0;
4917+
return need_rotate;
49124918
}
49134919

49144920
static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)

0 commit comments

Comments
 (0)