Skip to content

Commit 8ffa90e

Browse files
committed
xfs: fix deadlock and streamline xfs_getfsmap performance
Refactor xfs_getfsmap to improve its performance: instead of indirectly calling a function that copies one record to userspace at a time, create a shadow buffer in the kernel and copy the whole array once at the end. On the author's computer, this reduces the runtime on his /home by ~20%. This also eliminates a deadlock when running GETFSMAP against the realtime device. The current code locks the rtbitmap to create fsmappings and copies them into userspace, having not released the rtbitmap lock. If the userspace buffer is an mmap of a sparse file that itself resides on the realtime device, the write page fault will recurse into the fs for allocation, which will deadlock on the rtbitmap lock. Fixes: 4c934c7 ("xfs: report realtime space information via the rtbitmap") Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chandan Babu R <chandanrlinux@gmail.com>
1 parent acd1ac3 commit 8ffa90e

3 files changed

Lines changed: 124 additions & 71 deletions

File tree

fs/xfs/xfs_fsmap.c

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#include "xfs_rtalloc.h"
2727

2828
/* Convert an xfs_fsmap to an fsmap. */
29-
void
29+
static void
3030
xfs_fsmap_from_internal(
3131
struct fsmap *dest,
3232
struct xfs_fsmap *src)
@@ -155,8 +155,7 @@ xfs_fsmap_owner_from_rmap(
155155
/* getfsmap query state */
156156
struct xfs_getfsmap_info {
157157
struct xfs_fsmap_head *head;
158-
xfs_fsmap_format_t formatter; /* formatting fn */
159-
void *format_arg; /* format buffer */
158+
struct fsmap *fsmap_recs; /* mapping records */
160159
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
161160
xfs_daddr_t next_daddr; /* next daddr we expect */
162161
u64 missing_owner; /* owner of holes */
@@ -224,6 +223,20 @@ xfs_getfsmap_is_shared(
224223
return 0;
225224
}
226225

226+
static inline void
227+
xfs_getfsmap_format(
228+
struct xfs_mount *mp,
229+
struct xfs_fsmap *xfm,
230+
struct xfs_getfsmap_info *info)
231+
{
232+
struct fsmap *rec;
233+
234+
trace_xfs_getfsmap_mapping(mp, xfm);
235+
236+
rec = &info->fsmap_recs[info->head->fmh_entries++];
237+
xfs_fsmap_from_internal(rec, xfm);
238+
}
239+
227240
/*
228241
* Format a reverse mapping for getfsmap, having translated rm_startblock
229242
* into the appropriate daddr units.
@@ -288,10 +301,7 @@ xfs_getfsmap_helper(
288301
fmr.fmr_offset = 0;
289302
fmr.fmr_length = rec_daddr - info->next_daddr;
290303
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
291-
error = info->formatter(&fmr, info->format_arg);
292-
if (error)
293-
return error;
294-
info->head->fmh_entries++;
304+
xfs_getfsmap_format(mp, &fmr, info);
295305
}
296306

297307
if (info->last)
@@ -323,11 +333,8 @@ xfs_getfsmap_helper(
323333
if (shared)
324334
fmr.fmr_flags |= FMR_OF_SHARED;
325335
}
326-
error = info->formatter(&fmr, info->format_arg);
327-
if (error)
328-
return error;
329-
info->head->fmh_entries++;
330336

337+
xfs_getfsmap_format(mp, &fmr, info);
331338
out:
332339
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
333340
if (info->next_daddr < rec_daddr)
@@ -795,11 +802,11 @@ xfs_getfsmap_check_keys(
795802
#endif /* CONFIG_XFS_RT */
796803

797804
/*
798-
* Get filesystem's extents as described in head, and format for
799-
* output. Calls formatter to fill the user's buffer until all
800-
* extents are mapped, until the passed-in head->fmh_count slots have
801-
* been filled, or until the formatter short-circuits the loop, if it
802-
* is tracking filled-in extents on its own.
805+
* Get filesystem's extents as described in head, and format for output. Fills
806+
* in the supplied records array until there are no more reverse mappings to
807+
* return or head.fmh_entries == head.fmh_count. In the second case, this
808+
* function returns -ECANCELED to indicate that more records would have been
809+
* returned.
803810
*
804811
* Key to Confusion
805812
* ----------------
@@ -819,8 +826,7 @@ int
819826
xfs_getfsmap(
820827
struct xfs_mount *mp,
821828
struct xfs_fsmap_head *head,
822-
xfs_fsmap_format_t formatter,
823-
void *arg)
829+
struct fsmap *fsmap_recs)
824830
{
825831
struct xfs_trans *tp = NULL;
826832
struct xfs_fsmap dkeys[2]; /* per-dev keys */
@@ -895,8 +901,7 @@ xfs_getfsmap(
895901

896902
info.next_daddr = head->fmh_keys[0].fmr_physical +
897903
head->fmh_keys[0].fmr_length;
898-
info.formatter = formatter;
899-
info.format_arg = arg;
904+
info.fsmap_recs = fsmap_recs;
900905
info.head = head;
901906

902907
/*

fs/xfs/xfs_fsmap.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,9 @@ struct xfs_fsmap_head {
2727
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
2828
};
2929

30-
void xfs_fsmap_from_internal(struct fsmap *dest, struct xfs_fsmap *src);
3130
void xfs_fsmap_to_internal(struct xfs_fsmap *dest, struct fsmap *src);
3231

33-
/* fsmap to userspace formatter - copy to user & advance pointer */
34-
typedef int (*xfs_fsmap_format_t)(struct xfs_fsmap *, void *);
35-
3632
int xfs_getfsmap(struct xfs_mount *mp, struct xfs_fsmap_head *head,
37-
xfs_fsmap_format_t formatter, void *arg);
33+
struct fsmap *out_recs);
3834

3935
#endif /* __XFS_FSMAP_H__ */

fs/xfs/xfs_ioctl.c

Lines changed: 98 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,39 +1716,17 @@ xfs_ioc_getbmap(
17161716
return error;
17171717
}
17181718

1719-
struct getfsmap_info {
1720-
struct xfs_mount *mp;
1721-
struct fsmap_head __user *data;
1722-
unsigned int idx;
1723-
__u32 last_flags;
1724-
};
1725-
1726-
STATIC int
1727-
xfs_getfsmap_format(struct xfs_fsmap *xfm, void *priv)
1728-
{
1729-
struct getfsmap_info *info = priv;
1730-
struct fsmap fm;
1731-
1732-
trace_xfs_getfsmap_mapping(info->mp, xfm);
1733-
1734-
info->last_flags = xfm->fmr_flags;
1735-
xfs_fsmap_from_internal(&fm, xfm);
1736-
if (copy_to_user(&info->data->fmh_recs[info->idx++], &fm,
1737-
sizeof(struct fsmap)))
1738-
return -EFAULT;
1739-
1740-
return 0;
1741-
}
1742-
17431719
STATIC int
17441720
xfs_ioc_getfsmap(
17451721
struct xfs_inode *ip,
17461722
struct fsmap_head __user *arg)
17471723
{
1748-
struct getfsmap_info info = { NULL };
17491724
struct xfs_fsmap_head xhead = {0};
17501725
struct fsmap_head head;
1751-
bool aborted = false;
1726+
struct fsmap *recs;
1727+
unsigned int count;
1728+
__u32 last_flags = 0;
1729+
bool done = false;
17521730
int error;
17531731

17541732
if (copy_from_user(&head, arg, sizeof(struct fsmap_head)))
@@ -1760,38 +1738,112 @@ xfs_ioc_getfsmap(
17601738
sizeof(head.fmh_keys[1].fmr_reserved)))
17611739
return -EINVAL;
17621740

1741+
/*
1742+
* Use an internal memory buffer so that we don't have to copy fsmap
1743+
* data to userspace while holding locks. Start by trying to allocate
1744+
* up to 128k for the buffer, but fall back to a single page if needed.
1745+
*/
1746+
count = min_t(unsigned int, head.fmh_count,
1747+
131072 / sizeof(struct fsmap));
1748+
recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
1749+
if (!recs) {
1750+
count = min_t(unsigned int, head.fmh_count,
1751+
PAGE_SIZE / sizeof(struct fsmap));
1752+
recs = kvzalloc(count * sizeof(struct fsmap), GFP_KERNEL);
1753+
if (!recs)
1754+
return -ENOMEM;
1755+
}
1756+
17631757
xhead.fmh_iflags = head.fmh_iflags;
1764-
xhead.fmh_count = head.fmh_count;
17651758
xfs_fsmap_to_internal(&xhead.fmh_keys[0], &head.fmh_keys[0]);
17661759
xfs_fsmap_to_internal(&xhead.fmh_keys[1], &head.fmh_keys[1]);
17671760

17681761
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
17691762
trace_xfs_getfsmap_high_key(ip->i_mount, &xhead.fmh_keys[1]);
17701763

1771-
info.mp = ip->i_mount;
1772-
info.data = arg;
1773-
error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
1774-
if (error == -ECANCELED) {
1775-
error = 0;
1776-
aborted = true;
1777-
} else if (error)
1778-
return error;
1764+
head.fmh_entries = 0;
1765+
do {
1766+
struct fsmap __user *user_recs;
1767+
struct fsmap *last_rec;
1768+
1769+
user_recs = &arg->fmh_recs[head.fmh_entries];
1770+
xhead.fmh_entries = 0;
1771+
xhead.fmh_count = min_t(unsigned int, count,
1772+
head.fmh_count - head.fmh_entries);
1773+
1774+
/* Run query, record how many entries we got. */
1775+
error = xfs_getfsmap(ip->i_mount, &xhead, recs);
1776+
switch (error) {
1777+
case 0:
1778+
/*
1779+
* There are no more records in the result set. Copy
1780+
* whatever we got to userspace and break out.
1781+
*/
1782+
done = true;
1783+
break;
1784+
case -ECANCELED:
1785+
/*
1786+
* The internal memory buffer is full. Copy whatever
1787+
* records we got to userspace and go again if we have
1788+
* not yet filled the userspace buffer.
1789+
*/
1790+
error = 0;
1791+
break;
1792+
default:
1793+
goto out_free;
1794+
}
1795+
head.fmh_entries += xhead.fmh_entries;
1796+
head.fmh_oflags = xhead.fmh_oflags;
17791797

1780-
/* If we didn't abort, set the "last" flag in the last fmx */
1781-
if (!aborted && info.idx) {
1782-
info.last_flags |= FMR_OF_LAST;
1783-
if (copy_to_user(&info.data->fmh_recs[info.idx - 1].fmr_flags,
1784-
&info.last_flags, sizeof(info.last_flags)))
1785-
return -EFAULT;
1798+
/*
1799+
* If the caller wanted a record count or there aren't any
1800+
* new records to return, we're done.
1801+
*/
1802+
if (head.fmh_count == 0 || xhead.fmh_entries == 0)
1803+
break;
1804+
1805+
/* Copy all the records we got out to userspace. */
1806+
if (copy_to_user(user_recs, recs,
1807+
xhead.fmh_entries * sizeof(struct fsmap))) {
1808+
error = -EFAULT;
1809+
goto out_free;
1810+
}
1811+
1812+
/* Remember the last record flags we copied to userspace. */
1813+
last_rec = &recs[xhead.fmh_entries - 1];
1814+
last_flags = last_rec->fmr_flags;
1815+
1816+
/* Set up the low key for the next iteration. */
1817+
xfs_fsmap_to_internal(&xhead.fmh_keys[0], last_rec);
1818+
trace_xfs_getfsmap_low_key(ip->i_mount, &xhead.fmh_keys[0]);
1819+
} while (!done && head.fmh_entries < head.fmh_count);
1820+
1821+
/*
1822+
* If there are no more records in the query result set and we're not
1823+
* in counting mode, mark the last record returned with the LAST flag.
1824+
*/
1825+
if (done && head.fmh_count > 0 && head.fmh_entries > 0) {
1826+
struct fsmap __user *user_rec;
1827+
1828+
last_flags |= FMR_OF_LAST;
1829+
user_rec = &arg->fmh_recs[head.fmh_entries - 1];
1830+
1831+
if (copy_to_user(&user_rec->fmr_flags, &last_flags,
1832+
sizeof(last_flags))) {
1833+
error = -EFAULT;
1834+
goto out_free;
1835+
}
17861836
}
17871837

17881838
/* copy back header */
1789-
head.fmh_entries = xhead.fmh_entries;
1790-
head.fmh_oflags = xhead.fmh_oflags;
1791-
if (copy_to_user(arg, &head, sizeof(struct fsmap_head)))
1792-
return -EFAULT;
1839+
if (copy_to_user(arg, &head, sizeof(struct fsmap_head))) {
1840+
error = -EFAULT;
1841+
goto out_free;
1842+
}
17931843

1794-
return 0;
1844+
out_free:
1845+
kmem_free(recs);
1846+
return error;
17951847
}
17961848

17971849
STATIC int

0 commit comments

Comments
 (0)