Skip to content

Commit 18f473b

Browse files
lxbszidryomov
authored andcommitted
ceph: periodically send perf metrics to MDSes
This will send the caps/read/write/metadata metrics to any available MDS once per second, which will be the same as the userland client. It will skip the MDS sessions which don't support the metric collection, as the MDSs will close socket connections when they get an unknown type message. We can disable the metric sending via the disable_send_metrics module parameter. [ jlayton: fix up endianness bug in ceph_mdsc_send_metrics() ] URL: https://tracker.ceph.com/issues/43215 Signed-off-by: Xiubo Li <xiubli@redhat.com> Signed-off-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
1 parent aaf5a47 commit 18f473b

7 files changed

Lines changed: 276 additions & 1 deletion

File tree

fs/ceph/mds_client.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3334,6 +3334,8 @@ static void handle_session(struct ceph_mds_session *session,
33343334
session->s_state = CEPH_MDS_SESSION_OPEN;
33353335
session->s_features = features;
33363336
renewed_caps(mdsc, session, 0);
3337+
if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
3338+
metric_schedule_delayed(&mdsc->metric);
33373339
wake = 1;
33383340
if (mdsc->stopping)
33393341
__close_session(mdsc, session);
@@ -4725,6 +4727,7 @@ void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
47254727

47264728
ceph_metric_destroy(&mdsc->metric);
47274729

4730+
flush_delayed_work(&mdsc->metric.delayed_work);
47284731
fsc->mdsc = NULL;
47294732
kfree(mdsc);
47304733
dout("mdsc_destroy %p done\n", mdsc);

fs/ceph/mds_client.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,9 @@ enum ceph_feature_type {
2828
CEPHFS_FEATURE_LAZY_CAP_WANTED,
2929
CEPHFS_FEATURE_MULTI_RECONNECT,
3030
CEPHFS_FEATURE_DELEG_INO,
31+
CEPHFS_FEATURE_METRIC_COLLECT,
3132

32-
CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_DELEG_INO,
33+
CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
3334
};
3435

3536
/*
@@ -43,6 +44,7 @@ enum ceph_feature_type {
4344
CEPHFS_FEATURE_LAZY_CAP_WANTED, \
4445
CEPHFS_FEATURE_MULTI_RECONNECT, \
4546
CEPHFS_FEATURE_DELEG_INO, \
47+
CEPHFS_FEATURE_METRIC_COLLECT, \
4648
\
4749
CEPHFS_FEATURE_MAX, \
4850
}

fs/ceph/metric.c

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,150 @@
11
/* SPDX-License-Identifier: GPL-2.0 */
2+
#include <linux/ceph/ceph_debug.h>
23

34
#include <linux/types.h>
45
#include <linux/percpu_counter.h>
56
#include <linux/math64.h>
67

78
#include "metric.h"
9+
#include "mds_client.h"
10+
11+
static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
12+
struct ceph_mds_session *s)
13+
{
14+
struct ceph_metric_head *head;
15+
struct ceph_metric_cap *cap;
16+
struct ceph_metric_read_latency *read;
17+
struct ceph_metric_write_latency *write;
18+
struct ceph_metric_metadata_latency *meta;
19+
struct ceph_client_metric *m = &mdsc->metric;
20+
u64 nr_caps = atomic64_read(&m->total_caps);
21+
struct ceph_msg *msg;
22+
struct timespec64 ts;
23+
s64 sum;
24+
s32 items = 0;
25+
s32 len;
26+
27+
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
28+
+ sizeof(*meta);
29+
30+
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
31+
if (!msg) {
32+
pr_err("send metrics to mds%d, failed to allocate message\n",
33+
s->s_mds);
34+
return false;
35+
}
36+
37+
head = msg->front.iov_base;
38+
39+
/* encode the cap metric */
40+
cap = (struct ceph_metric_cap *)(head + 1);
41+
cap->type = cpu_to_le32(CLIENT_METRIC_TYPE_CAP_INFO);
42+
cap->ver = 1;
43+
cap->compat = 1;
44+
cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
45+
cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
46+
cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
47+
cap->total = cpu_to_le64(nr_caps);
48+
items++;
49+
50+
/* encode the read latency metric */
51+
read = (struct ceph_metric_read_latency *)(cap + 1);
52+
read->type = cpu_to_le32(CLIENT_METRIC_TYPE_READ_LATENCY);
53+
read->ver = 1;
54+
read->compat = 1;
55+
read->data_len = cpu_to_le32(sizeof(*read) - 10);
56+
sum = m->read_latency_sum;
57+
jiffies_to_timespec64(sum, &ts);
58+
read->sec = cpu_to_le32(ts.tv_sec);
59+
read->nsec = cpu_to_le32(ts.tv_nsec);
60+
items++;
61+
62+
/* encode the write latency metric */
63+
write = (struct ceph_metric_write_latency *)(read + 1);
64+
write->type = cpu_to_le32(CLIENT_METRIC_TYPE_WRITE_LATENCY);
65+
write->ver = 1;
66+
write->compat = 1;
67+
write->data_len = cpu_to_le32(sizeof(*write) - 10);
68+
sum = m->write_latency_sum;
69+
jiffies_to_timespec64(sum, &ts);
70+
write->sec = cpu_to_le32(ts.tv_sec);
71+
write->nsec = cpu_to_le32(ts.tv_nsec);
72+
items++;
73+
74+
/* encode the metadata latency metric */
75+
meta = (struct ceph_metric_metadata_latency *)(write + 1);
76+
meta->type = cpu_to_le32(CLIENT_METRIC_TYPE_METADATA_LATENCY);
77+
meta->ver = 1;
78+
meta->compat = 1;
79+
meta->data_len = cpu_to_le32(sizeof(*meta) - 10);
80+
sum = m->metadata_latency_sum;
81+
jiffies_to_timespec64(sum, &ts);
82+
meta->sec = cpu_to_le32(ts.tv_sec);
83+
meta->nsec = cpu_to_le32(ts.tv_nsec);
84+
items++;
85+
86+
put_unaligned_le32(items, &head->num);
87+
msg->front.iov_len = len;
88+
msg->hdr.version = cpu_to_le16(1);
89+
msg->hdr.compat_version = cpu_to_le16(1);
90+
msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
91+
dout("client%llu send metrics to mds%d\n",
92+
ceph_client_gid(mdsc->fsc->client), s->s_mds);
93+
ceph_con_send(&s->s_con, msg);
94+
95+
return true;
96+
}
97+
98+
99+
static void metric_get_session(struct ceph_mds_client *mdsc)
100+
{
101+
struct ceph_mds_session *s;
102+
int i;
103+
104+
mutex_lock(&mdsc->mutex);
105+
for (i = 0; i < mdsc->max_sessions; i++) {
106+
s = __ceph_lookup_mds_session(mdsc, i);
107+
if (!s)
108+
continue;
109+
110+
/*
111+
* Skip it if MDS doesn't support the metric collection,
112+
* or the MDS will close the session's socket connection
113+
* directly when it get this message.
114+
*/
115+
if (check_session_state(s) &&
116+
test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &s->s_features)) {
117+
mdsc->metric.session = s;
118+
break;
119+
}
120+
121+
ceph_put_mds_session(s);
122+
}
123+
mutex_unlock(&mdsc->mutex);
124+
}
125+
126+
static void metric_delayed_work(struct work_struct *work)
127+
{
128+
struct ceph_client_metric *m =
129+
container_of(work, struct ceph_client_metric, delayed_work.work);
130+
struct ceph_mds_client *mdsc =
131+
container_of(m, struct ceph_mds_client, metric);
132+
133+
if (mdsc->stopping)
134+
return;
135+
136+
if (!m->session || !check_session_state(m->session)) {
137+
if (m->session) {
138+
ceph_put_mds_session(m->session);
139+
m->session = NULL;
140+
}
141+
metric_get_session(mdsc);
142+
}
143+
if (m->session) {
144+
ceph_mdsc_send_metrics(mdsc, m->session);
145+
metric_schedule_delayed(m);
146+
}
147+
}
8148

9149
int ceph_metric_init(struct ceph_client_metric *m)
10150
{
@@ -52,6 +192,9 @@ int ceph_metric_init(struct ceph_client_metric *m)
52192
m->total_metadatas = 0;
53193
m->metadata_latency_sum = 0;
54194

195+
m->session = NULL;
196+
INIT_DELAYED_WORK(&m->delayed_work, metric_delayed_work);
197+
55198
return 0;
56199

57200
err_i_caps_mis:
@@ -73,6 +216,11 @@ void ceph_metric_destroy(struct ceph_client_metric *m)
73216
percpu_counter_destroy(&m->i_caps_hit);
74217
percpu_counter_destroy(&m->d_lease_mis);
75218
percpu_counter_destroy(&m->d_lease_hit);
219+
220+
cancel_delayed_work_sync(&m->delayed_work);
221+
222+
if (m->session)
223+
ceph_put_mds_session(m->session);
76224
}
77225

78226
static inline void __update_latency(ktime_t *totalp, ktime_t *lsump,

fs/ceph/metric.h

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,71 @@
66
#include <linux/percpu_counter.h>
77
#include <linux/ktime.h>
88

9+
extern bool disable_send_metrics;
10+
11+
enum ceph_metric_type {
12+
CLIENT_METRIC_TYPE_CAP_INFO,
13+
CLIENT_METRIC_TYPE_READ_LATENCY,
14+
CLIENT_METRIC_TYPE_WRITE_LATENCY,
15+
CLIENT_METRIC_TYPE_METADATA_LATENCY,
16+
CLIENT_METRIC_TYPE_DENTRY_LEASE,
17+
18+
CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
19+
};
20+
21+
/* metric caps header */
22+
struct ceph_metric_cap {
23+
__le32 type; /* ceph metric type */
24+
25+
__u8 ver;
26+
__u8 compat;
27+
28+
__le32 data_len; /* length of sizeof(hit + mis + total) */
29+
__le64 hit;
30+
__le64 mis;
31+
__le64 total;
32+
} __packed;
33+
34+
/* metric read latency header */
35+
struct ceph_metric_read_latency {
36+
__le32 type; /* ceph metric type */
37+
38+
__u8 ver;
39+
__u8 compat;
40+
41+
__le32 data_len; /* length of sizeof(sec + nsec) */
42+
__le32 sec;
43+
__le32 nsec;
44+
} __packed;
45+
46+
/* metric write latency header */
47+
struct ceph_metric_write_latency {
48+
__le32 type; /* ceph metric type */
49+
50+
__u8 ver;
51+
__u8 compat;
52+
53+
__le32 data_len; /* length of sizeof(sec + nsec) */
54+
__le32 sec;
55+
__le32 nsec;
56+
} __packed;
57+
58+
/* metric metadata latency header */
59+
struct ceph_metric_metadata_latency {
60+
__le32 type; /* ceph metric type */
61+
62+
__u8 ver;
63+
__u8 compat;
64+
65+
__le32 data_len; /* length of sizeof(sec + nsec) */
66+
__le32 sec;
67+
__le32 nsec;
68+
} __packed;
69+
70+
struct ceph_metric_head {
71+
__le32 num; /* the number of metrics that will be sent */
72+
} __packed;
73+
974
/* This is the global metrics */
1075
struct ceph_client_metric {
1176
atomic64_t total_dentries;
@@ -36,8 +101,20 @@ struct ceph_client_metric {
36101
ktime_t metadata_latency_sq_sum;
37102
ktime_t metadata_latency_min;
38103
ktime_t metadata_latency_max;
104+
105+
struct ceph_mds_session *session;
106+
struct delayed_work delayed_work; /* delayed work */
39107
};
40108

109+
static inline void metric_schedule_delayed(struct ceph_client_metric *m)
110+
{
111+
if (disable_send_metrics)
112+
return;
113+
114+
/* per second */
115+
schedule_delayed_work(&m->delayed_work, round_jiffies_relative(HZ));
116+
}
117+
41118
extern int ceph_metric_init(struct ceph_client_metric *m);
42119
extern void ceph_metric_destroy(struct ceph_client_metric *m);
43120

fs/ceph/super.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@
2727
#include <linux/ceph/auth.h>
2828
#include <linux/ceph/debugfs.h>
2929

30+
static DEFINE_SPINLOCK(ceph_fsc_lock);
31+
static LIST_HEAD(ceph_fsc_list);
32+
3033
/*
3134
* Ceph superblock operations
3235
*
@@ -691,6 +694,10 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
691694
if (!fsc->wb_pagevec_pool)
692695
goto fail_cap_wq;
693696

697+
spin_lock(&ceph_fsc_lock);
698+
list_add_tail(&fsc->metric_wakeup, &ceph_fsc_list);
699+
spin_unlock(&ceph_fsc_lock);
700+
694701
return fsc;
695702

696703
fail_cap_wq:
@@ -717,6 +724,10 @@ static void destroy_fs_client(struct ceph_fs_client *fsc)
717724
{
718725
dout("destroy_fs_client %p\n", fsc);
719726

727+
spin_lock(&ceph_fsc_lock);
728+
list_del(&fsc->metric_wakeup);
729+
spin_unlock(&ceph_fsc_lock);
730+
720731
ceph_mdsc_destroy(fsc);
721732
destroy_workqueue(fsc->inode_wq);
722733
destroy_workqueue(fsc->cap_wq);
@@ -1282,6 +1293,37 @@ static void __exit exit_ceph(void)
12821293
destroy_caches();
12831294
}
12841295

1296+
static int param_set_metrics(const char *val, const struct kernel_param *kp)
1297+
{
1298+
struct ceph_fs_client *fsc;
1299+
int ret;
1300+
1301+
ret = param_set_bool(val, kp);
1302+
if (ret) {
1303+
pr_err("Failed to parse sending metrics switch value '%s'\n",
1304+
val);
1305+
return ret;
1306+
} else if (!disable_send_metrics) {
1307+
// wake up all the mds clients
1308+
spin_lock(&ceph_fsc_lock);
1309+
list_for_each_entry(fsc, &ceph_fsc_list, metric_wakeup) {
1310+
metric_schedule_delayed(&fsc->mdsc->metric);
1311+
}
1312+
spin_unlock(&ceph_fsc_lock);
1313+
}
1314+
1315+
return 0;
1316+
}
1317+
1318+
static const struct kernel_param_ops param_ops_metrics = {
1319+
.set = param_set_metrics,
1320+
.get = param_get_bool,
1321+
};
1322+
1323+
bool disable_send_metrics = false;
1324+
module_param_cb(disable_send_metrics, &param_ops_metrics, &disable_send_metrics, 0644);
1325+
MODULE_PARM_DESC(disable_send_metrics, "Enable sending perf metrics to ceph cluster (default: on)");
1326+
12851327
module_init(init_ceph);
12861328
module_exit(exit_ceph);
12871329

fs/ceph/super.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@ struct ceph_mount_options {
101101
struct ceph_fs_client {
102102
struct super_block *sb;
103103

104+
struct list_head metric_wakeup;
105+
104106
struct ceph_mount_options *mount_options;
105107
struct ceph_client *client;
106108

include/linux/ceph/ceph_fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ struct ceph_dir_layout {
130130
#define CEPH_MSG_CLIENT_REQUEST 24
131131
#define CEPH_MSG_CLIENT_REQUEST_FORWARD 25
132132
#define CEPH_MSG_CLIENT_REPLY 26
133+
#define CEPH_MSG_CLIENT_METRICS 29
133134
#define CEPH_MSG_CLIENT_CAPS 0x310
134135
#define CEPH_MSG_CLIENT_LEASE 0x311
135136
#define CEPH_MSG_CLIENT_SNAP 0x312

0 commit comments

Comments
 (0)