ceph daemonperf tool分析

命令格式

Ceph有个daemonperf工具,结合Ceph daemon的asok文件,可以检查Ceph各个组件的当前状态。

命令格式:ceph daemonperf

示例如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# ceph daemonperf /var/run/ceph/ceph-osd.0.asok 2
---objecter--- -----------osd-----------
writ read actv|recop rd wr lat ops |
0 0 0 | 0 12k 0 0 1
0 0 0 | 0 532k 0 0 62
0 0 0 | 0 106k 0 0 10
0 0 0 | 0 487k 4.2M 1 71

# ceph daemonperf /var/run/ceph/ceph-mds.mds-ceph0.asok 2
-----mds------ --mds_server-- ---objecter--- -----mds_cache----- ---mds_log----
rlat inos caps|hsr hcs hcr |writ read actv|recd recy stry purg|segs evts subm|
0 137k 22k| 0 0 81 | 4 0 129 | 0 0 108k 4 | 54 40k 43
0 139k 15k| 0 0 7.5k|941 1 132 | 0 0 111k 918 | 46 34k 4.7k
0 145k 16k| 0 0 11k|592 0 129 | 0 0 116k 581 | 54 40k 6.3k
0 123k 7.3k| 0 1 6.9k|231 1 129 | 0 0 119k 214 | 48 33k 3.7k
0 128k 8.2k| 0 0 9.0k|509 0 130 | 0 0 123k 503 | 54 38k 5.0k
1 129k 8.3k| 0 0 924 |405 1 128 | 0 0 123k 402 | 55 39k 867

源码实现

文件:ceph.in

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def main():
...
if parsed_args.admin_socket:
sockpath = parsed_args.admin_socket
elif len(childargs) > 0 and childargs[0] in ["daemon", "daemonperf"]:
daemon_perf = (childargs[0] == "daemonperf")
# Treat "daemon <path>" or "daemon <name>" like --admin_daemon <path>
# Handle "daemonperf <path>" the same but requires no trailing args
require_args = 2 if daemon_perf else 3
...
if sockpath and daemon_perf:
interval = 1
count = None
if len(childargs) > 0:
try:
interval = float(childargs[0])
if interval < 0:
raise ValueError
except ValueError:
print('daemonperf: interval should be a positive number', file=sys.stderr)
return errno.EINVAL
if len(childargs) > 1:
if not childargs[1].isdigit():
print('daemonperf: count should be a positive integer', file=sys.stderr)
return errno.EINVAL
count = int(childargs[1])
DaemonWatcher(sockpath).run(interval, count)
return 0
...

文件:pybind/ceph_daemon.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class DaemonWatcher(object):
"""
Given a Ceph daemon's admin socket path, poll its performance counters
and output a series of output lines showing the momentary values of
counters of interest (those with the 'nick' property in Ceph's schema)
"""
...
def _load_schema(self):
"""
Populate our instance-local copy of the daemon's performance counter
schema, and work out which stats we will display.
"""
self._schema = json.loads(admin_socket(self.asok_path, ["perf", "schema"]))

# Build list of which stats we will display, based on which
# stats have a nickname
self._stats = defaultdict(dict)
for section_name, section_stats in self._schema.items():
for name, schema_data in section_stats.items():
if schema_data.get('nick'):
self._stats[section_name][name] = schema_data['nick']

def run(self, interval, count=None, ostr=sys.stdout):
"""
Print output at regular intervals until interrupted.

:param ostr: Stream to which to send output
"""

self._load_schema()
self._colored = self.supports_color(ostr)

self._print_headers(ostr)

last_dump = json.loads(admin_socket(self.asok_path, ["perf", "dump"]))
rows_since_header = 0
term_height = 25

try:
while True:
dump = json.loads(admin_socket(self.asok_path, ["perf", "dump"]))
if rows_since_header > term_height - 2:
self._print_headers(ostr)
rows_since_header = 0
self._print_vals(ostr, dump, last_dump)
if count is not None:
count -= 1
if count <= 0:
break
rows_since_header += 1
last_dump = dump
time.sleep(interval)
except KeyboardInterrupt:
return

所以ceph daemonperf <asok>命令的输出是根据ceph daemon <asok> perf dump/schema的输出整理的。

实现中存了上一次的perf dump结果,所以这里获取的值是interval里的数据统计。

以ceph mds asok的perf dump/schema输出为例,看看每个项是什么含义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
ceph daemon <mds-asok> perf schema
{
"mds": {
"reply_latency": {
"type": 5,
"description": "Reply latency",
"nick": "rlat"
},
...
"inodes": {
"type": 2,
"description": "Inodes",
"nick": "inos"
},
...
"caps": {
"type": 2,
"description": "Capabilities",
"nick": "caps"
},
...
},
"mds_cache": {
"num_strays": {
"type": 2,
"description": "Stray dentries",
"nick": "stry"
},
...
"strays_purged": {
"type": 10,
"description": "Stray dentries purged",
"nick": "purg"
},
...
"num_recovering_enqueued": {
"type": 2,
"description": "Files waiting for recovery",
"nick": "recy"
},
...
"recovery_completed": {
"type": 10,
"description": "File recoveries completed",
"nick": "recd"
}
},
"mds_log": {
"evadd": {
"type": 10,
"description": "Events submitted",
"nick": "subm"
},
...
"ev": {
"type": 2,
"description": "Events",
"nick": "evts"
},
...
"seg": {
"type": 2,
"description": "Segments",
"nick": "segs"
},
...
},
"mds_server": {
"handle_client_request": {
"type": 10,
"description": "Client requests",
"nick": "hcr"
},
"handle_slave_request": {
"type": 10,
"description": "Slave requests",
"nick": "hsr"
},
"handle_client_session": {
"type": 10,
"description": "Client session messages",
"nick": "hcs"
},
...
},
"objecter": {
"op_active": {
"type": 2,
"description": "Operations active",
"nick": "actv"
},
"op_r": {
"type": 10,
"description": "Read operations",
"nick": "read"
},
"op_w": {
"type": 10,
"description": "Write operations",
"nick": "writ"
},
...
},
}

schema输出里的含义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
文件:common/perf_counters.h

enum perfcounter_type_d
{
PERFCOUNTER_NONE = 0,
PERFCOUNTER_TIME = 0x1,
PERFCOUNTER_U64 = 0x2,
PERFCOUNTER_LONGRUNAVG = 0x4,
PERFCOUNTER_COUNTER = 0x8,
};

class PerfCounters
{
/** Represents a PerfCounters data element. */
struct perf_counter_data_any_d {
...
const char *name;
const char *description;
const char *nick;
enum perfcounter_type_d type;
atomic64_t u64;
atomic64_t avgcount;
atomic64_t avgcount2;
...
};
...
};

ceph daemon perf dump中与daemonperf相关的输出项:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
ceph daemon <mds-asok> perf dump
{
"mds": {
...
"reply_latency": {
"avgcount": 1879241,
"sum": 563.417555661
},
...
"inodes": 510943,
...
"caps": 68002,
...
},
"mds_cache": {
"num_strays": 415633,
...
"strays_purged": 1192320,
...
"num_recovering_enqueued": 0,
...
"recovery_completed": 9
},
"mds_log": {
"evadd": 2094629,
...
"ev": 42664,
...
"seg": 57,
...
},
"mds_server": {
"handle_client_request": 1879915,
"handle_slave_request": 0,
"handle_client_session": 698,
...
},
"objecter": {
"op_active": 64,
...
"op_r": 832,
"op_w": 1219634,
...
},
}
支持原创