Skip to content

Commit 0c16d96

Browse files
maorgottliebjgunthorpe
authored andcommitted
RDMA/umem: Move to allocate SG table from pages
Remove the implementation of ib_umem_add_sg_table and instead call to __sg_alloc_table_from_pages which already has the logic to merge contiguous pages. Besides that it removes duplicated functionality, it reduces the memory consumption of the SG table significantly. Prior to this patch, the SG table was allocated in advance regardless consideration of contiguous pages. In huge pages system of 2MB page size, without this change, the SG table would contain x512 SG entries. E.g. for 100GB memory registration: Number of entries Size Before 26214400 600.0MB After 51200 1.2MB Link: https://lore.kernel.org/r/20201004154340.1080481-5-leon@kernel.org Signed-off-by: Maor Gottlieb <maorg@nvidia.com> Signed-off-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 07da122 commit 0c16d96

1 file changed

Lines changed: 12 additions & 82 deletions

File tree

drivers/infiniband/core/umem.c

Lines changed: 12 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -60,73 +60,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
6060
sg_free_table(&umem->sg_head);
6161
}
6262

63-
/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
64-
*
65-
* sg: current scatterlist entry
66-
* page_list: array of npage struct page pointers
67-
* npages: number of pages in page_list
68-
* max_seg_sz: maximum segment size in bytes
69-
* nents: [out] number of entries in the scatterlist
70-
*
71-
* Return new end of scatterlist
72-
*/
73-
static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
74-
struct page **page_list,
75-
unsigned long npages,
76-
unsigned int max_seg_sz,
77-
int *nents)
78-
{
79-
unsigned long first_pfn;
80-
unsigned long i = 0;
81-
bool update_cur_sg = false;
82-
bool first = !sg_page(sg);
83-
84-
/* Check if new page_list is contiguous with end of previous page_list.
85-
* sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
86-
*/
87-
if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
88-
page_to_pfn(page_list[0])))
89-
update_cur_sg = true;
90-
91-
while (i != npages) {
92-
unsigned long len;
93-
struct page *first_page = page_list[i];
94-
95-
first_pfn = page_to_pfn(first_page);
96-
97-
/* Compute the number of contiguous pages we have starting
98-
* at i
99-
*/
100-
for (len = 0; i != npages &&
101-
first_pfn + len == page_to_pfn(page_list[i]) &&
102-
len < (max_seg_sz >> PAGE_SHIFT);
103-
len++)
104-
i++;
105-
106-
/* Squash N contiguous pages from page_list into current sge */
107-
if (update_cur_sg) {
108-
if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
109-
sg_set_page(sg, sg_page(sg),
110-
sg->length + (len << PAGE_SHIFT),
111-
0);
112-
update_cur_sg = false;
113-
continue;
114-
}
115-
update_cur_sg = false;
116-
}
117-
118-
/* Squash N contiguous pages into next sge or first sge */
119-
if (!first)
120-
sg = sg_next(sg);
121-
122-
(*nents)++;
123-
sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
124-
first = false;
125-
}
126-
127-
return sg;
128-
}
129-
13063
/**
13164
* ib_umem_find_best_pgsz - Find best HW page size to use for this MR
13265
*
@@ -201,7 +134,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
201134
struct mm_struct *mm;
202135
unsigned long npages;
203136
int ret;
204-
struct scatterlist *sg;
137+
struct scatterlist *sg = NULL;
205138
unsigned int gup_flags = FOLL_WRITE;
206139

207140
/*
@@ -251,15 +184,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
251184

252185
cur_base = addr & PAGE_MASK;
253186

254-
ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
255-
if (ret)
256-
goto vma;
257-
258187
if (!umem->writable)
259188
gup_flags |= FOLL_FORCE;
260189

261-
sg = umem->sg_head.sgl;
262-
263190
while (npages) {
264191
cond_resched();
265192
ret = pin_user_pages_fast(cur_base,
@@ -271,15 +198,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
271198
goto umem_release;
272199

273200
cur_base += ret * PAGE_SIZE;
274-
npages -= ret;
275-
276-
sg = ib_umem_add_sg_table(sg, page_list, ret,
277-
dma_get_max_seg_size(device->dma_device),
278-
&umem->sg_nents);
201+
npages -= ret;
202+
sg = __sg_alloc_table_from_pages(
203+
&umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
204+
dma_get_max_seg_size(device->dma_device), sg, npages,
205+
GFP_KERNEL);
206+
umem->sg_nents = umem->sg_head.nents;
207+
if (IS_ERR(sg)) {
208+
unpin_user_pages_dirty_lock(page_list, ret, 0);
209+
ret = PTR_ERR(sg);
210+
goto umem_release;
211+
}
279212
}
280213

281-
sg_mark_end(sg);
282-
283214
if (access & IB_ACCESS_RELAXED_ORDERING)
284215
dma_attr |= DMA_ATTR_WEAK_ORDERING;
285216

@@ -297,7 +228,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
297228

298229
umem_release:
299230
__ib_umem_release(device, umem, 0);
300-
vma:
301231
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
302232
out:
303233
free_page((unsigned long) page_list);

0 commit comments

Comments
 (0)