Skip to content

Commit 4cfc9ec

Browse files
ryanhrob1Naim
authored andcommitted
mm/page_alloc: Optimize free_contig_range()
Decompose the range of order-0 pages to be freed into the set of largest possible power-of-2 size and aligned chunks and free them to the pcp or buddy. This improves on the previous approach which freed each order-0 page individually in a loop. Testing shows performance to be improved by more than 10x in some cases. Since each page is order-0, we must decrement each page's reference count individually and only consider the page for freeing as part of a high order chunk if the reference count goes to zero. Additionally free_pages_prepare() must be called for each individual order-0 page too, so that the struct page state and global accounting state can be appropriately managed. But once this is done, the resulting high order chunks can be freed as a unit to the pcp or buddy. This significantly speeds up the free operation but also has the side benefit that high order blocks are added to the pcp instead of each page ending up on the pcp order-0 list; memory remains more readily available in high orders. vmalloc will shortly become a user of this new optimized free_contig_range() since it aggressively allocates high order non-compound pages, but then calls split_page() to end up with contiguous order-0 pages. These can now be freed much more efficiently. The execution time of the following function was measured in a server class arm64 machine: static int page_alloc_high_order_test(void) { unsigned int order = HPAGE_PMD_ORDER; struct page *page; int i; for (i = 0; i < 100000; i++) { page = alloc_pages(GFP_KERNEL, order); if (!page) return -1; split_page(page, order); free_contig_range(page_to_pfn(page), 1UL << order); } return 0; } Execution time before: 4097358 usec Execution time after: 729831 usec Perf trace before: 99.63% 0.00% kthreadd [kernel.kallsyms] [.] kthread | ---kthread 0xffffb33c12a26af8 | |--98.13%--0xffffb33c12a26060 | | | |--97.37%--free_contig_range | | | | | |--94.93%--___free_pages | | | | | | | |--55.42%--__free_frozen_pages | | | | | | | | | --43.20%--free_frozen_page_commit | | | | | | | | | --35.37%--_raw_spin_unlock_irqrestore | | | | | | | |--11.53%--_raw_spin_trylock | | | | | | | |--8.19%--__preempt_count_dec_and_test | | | | | | | |--5.64%--_raw_spin_unlock | | | | | | | |--2.37%--__get_pfnblock_flags_mask.isra.0 | | | | | | | --1.07%--free_frozen_page_commit | | | | | --1.54%--__free_frozen_pages | | | --0.77%--___free_pages | --0.98%--0xffffb33c12a26078 alloc_pages_noprof Perf trace after: 8.42% 2.90% kthreadd [kernel.kallsyms] [k] __free_contig_range | |--5.52%--__free_contig_range | | | |--5.00%--free_prepared_contig_range | | | | | |--1.43%--__free_frozen_pages | | | | | | | --0.51%--free_frozen_page_commit | | | | | |--1.08%--_raw_spin_trylock | | | | | --0.89%--_raw_spin_unlock | | | --0.52%--free_pages_prepare | --2.90%--ret_from_fork kthread 0xffffae1c12abeaf8 0xffffae1c12abe7a0 | --2.69%--vfree __free_contig_range Acked-by: David Hildenbrand (Arm) <david@kernel.org> Acked-by: Vlastimil Babka (SUSE) <vbabka@kernel.org> Reviewed-by: Zi Yan <ziy@nvidia.com> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> Co-developed-by: Muhammad Usama Anjum <usama.anjum@arm.com> Signed-off-by: Muhammad Usama Anjum <usama.anjum@arm.com>
1 parent d1344ad commit 4cfc9ec

2 files changed

Lines changed: 110 additions & 4 deletions

File tree

include/linux/gfp.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -467,6 +467,8 @@ void free_contig_frozen_range(unsigned long pfn, unsigned long nr_pages);
467467
void free_contig_range(unsigned long pfn, unsigned long nr_pages);
468468
#endif
469469

470+
void __free_contig_range(unsigned long pfn, unsigned long nr_pages);
471+
470472
DEFINE_FREE(free_page, void *, free_page((unsigned long)_T))
471473

472474
#endif /* __LINUX_GFP_H */

mm/page_alloc.c

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ typedef int __bitwise fpi_t;
9090
/* Free the page without taking locks. Rely on trylock only. */
9191
#define FPI_TRYLOCK ((__force fpi_t)BIT(2))
9292

93+
/* free_pages_prepare() has already been called for page(s) being freed. */
94+
#define FPI_PREPARED ((__force fpi_t)BIT(3))
95+
9396
/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
9497
static DEFINE_MUTEX(pcp_batch_high_lock);
9598
#define MIN_PERCPU_PAGELIST_HIGH_FRACTION (8)
@@ -1339,15 +1342,18 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr)
13391342

13401343
#endif /* CONFIG_MEM_ALLOC_PROFILING */
13411344

1342-
__always_inline bool __free_pages_prepare(struct page *page,
1343-
unsigned int order, fpi_t fpi_flags)
1345+
static __always_inline bool __free_pages_prepare(struct page *page,
1346+
unsigned int order, fpi_t fpi_flags)
13441347
{
13451348
int bad = 0;
13461349
bool skip_kasan_poison = should_skip_kasan_poison(page);
13471350
bool init = want_init_on_free();
13481351
bool compound = PageCompound(page);
13491352
struct folio *folio = page_folio(page);
13501353

1354+
if (fpi_flags & FPI_PREPARED)
1355+
return true;
1356+
13511357
VM_BUG_ON_PAGE(PageTail(page), page);
13521358

13531359
trace_mm_page_free(page, order);
@@ -6824,6 +6830,105 @@ void __init page_alloc_sysctl_init(void)
68246830
register_sysctl_init("vm", page_alloc_sysctl_table);
68256831
}
68266832

6833+
static void free_prepared_contig_range(struct page *page,
6834+
unsigned long nr_pages)
6835+
{
6836+
unsigned long pfn = page_to_pfn(page);
6837+
6838+
while (nr_pages) {
6839+
unsigned int order;
6840+
6841+
/* We are limited by the largest buddy order. */
6842+
order = pfn ? __ffs(pfn) : MAX_PAGE_ORDER;
6843+
/* Don't exceed the number of pages to free. */
6844+
order = min_t(unsigned int, order, ilog2(nr_pages));
6845+
order = min_t(unsigned int, order, MAX_PAGE_ORDER);
6846+
6847+
/*
6848+
* Free the chunk as a single block. Our caller has already
6849+
* called free_pages_prepare() for each order-0 page.
6850+
*/
6851+
__free_frozen_pages(page, order, FPI_PREPARED);
6852+
6853+
pfn += 1UL << order;
6854+
page += 1UL << order;
6855+
nr_pages -= 1UL << order;
6856+
}
6857+
}
6858+
6859+
static void __free_contig_range_common(unsigned long pfn, unsigned long nr_pages,
6860+
bool is_frozen)
6861+
{
6862+
struct page *page, *start = NULL;
6863+
unsigned long nr_start = 0;
6864+
unsigned long start_sec;
6865+
unsigned long i;
6866+
6867+
for (i = 0; i < nr_pages; i++) {
6868+
bool can_free = true;
6869+
6870+
/*
6871+
* Contiguous PFNs might not have contiguous "struct pages"
6872+
* in some kernel configs: page++ across a section boundary
6873+
* is undefined. Use pfn_to_page() for each PFN.
6874+
*/
6875+
page = pfn_to_page(pfn + i);
6876+
6877+
VM_WARN_ON_ONCE(PageHead(page));
6878+
VM_WARN_ON_ONCE(PageTail(page));
6879+
6880+
if (!is_frozen)
6881+
can_free = put_page_testzero(page);
6882+
6883+
if (can_free)
6884+
can_free = free_pages_prepare(page, 0);
6885+
6886+
if (!can_free) {
6887+
if (start) {
6888+
free_prepared_contig_range(start, i - nr_start);
6889+
start = NULL;
6890+
}
6891+
continue;
6892+
}
6893+
6894+
if (start && memdesc_section(page->flags) != start_sec) {
6895+
free_prepared_contig_range(start, i - nr_start);
6896+
start = page;
6897+
nr_start = i;
6898+
start_sec = memdesc_section(page->flags);
6899+
} else if (!start) {
6900+
start = page;
6901+
nr_start = i;
6902+
start_sec = memdesc_section(page->flags);
6903+
}
6904+
}
6905+
6906+
if (start)
6907+
free_prepared_contig_range(start, nr_pages - nr_start);
6908+
}
6909+
6910+
/**
6911+
* __free_contig_range - Free contiguous range of order-0 pages.
6912+
* @pfn: Page frame number of the first page in the range.
6913+
* @nr_pages: Number of pages to free.
6914+
*
6915+
* For each order-0 struct page in the physically contiguous range, put a
6916+
* reference. Free any page who's reference count falls to zero. The
6917+
* implementation is functionally equivalent to, but significantly faster than
6918+
* calling __free_page() for each struct page in a loop.
6919+
*
6920+
* Memory allocated with alloc_pages(order>=1) then subsequently split to
6921+
* order-0 with split_page() is an example of appropriate contiguous pages that
6922+
* can be freed with this API.
6923+
*
6924+
* Context: May be called in interrupt context or while holding a normal
6925+
* spinlock, but not in NMI context or while holding a raw spinlock.
6926+
*/
6927+
void __free_contig_range(unsigned long pfn, unsigned long nr_pages)
6928+
{
6929+
__free_contig_range_common(pfn, nr_pages, /* is_frozen= */ false);
6930+
}
6931+
68276932
#ifdef CONFIG_CONTIG_ALLOC
68286933
/* Usage: See admin-guide/dynamic-debug-howto.rst */
68296934
static void alloc_contig_dump_pages(struct list_head *page_list)
@@ -7370,8 +7475,7 @@ void free_contig_range(unsigned long pfn, unsigned long nr_pages)
73707475
if (WARN_ON_ONCE(PageHead(pfn_to_page(pfn))))
73717476
return;
73727477

7373-
for (; nr_pages--; pfn++)
7374-
__free_page(pfn_to_page(pfn));
7478+
__free_contig_range(pfn, nr_pages);
73757479
}
73767480
EXPORT_SYMBOL(free_contig_range);
73777481
#endif /* CONFIG_CONTIG_ALLOC */

0 commit comments

Comments
 (0)