Ceph osd journal测试

创建pool

1
2
3
4
5
6
7
8
To create a pool, execute:

ceph osd pool create {pool-name} {pg-num} [{pgp-num}] [replicated] \
[crush-ruleset-name]
ceph osd pool create {pool-name} {pg-num} {pgp-num} erasure \
[erasure-code-profile] [crush-ruleset-name]

# rados mkpool testpool / ceph osd pool create testpool 1024 1024 replicated

测试journal文件rename

  1. put一个object

    rados put testobj ~/truck-dis-6.5.tar.gz --pool=testpool
    
  2. 获取object的map信息

    ceph osd map testpool testobj
    
  3. 到对应的osd上把journal文件重命名为别的随意名字

    mv journal bak.journal
    
  4. get这个object

    rados get testobj tst.tar.gz --pool=testpool
    
  5. 对比get的object和源object一致

    diff tst.tar.gz ~/truck-dis-6.5.tar.gz
    
  6. 删除该object

    rados rm testobj --pool=testpool 
    
  7. 重新put该object

    rados put testobj ~/truck-dis-6.5.tar.gz --pool=testpool
    
  8. get这个object

    rados get testobj tst.tar.gz --pool=testpool
    
  9. 对比get的object和源object一致

    diff tst.tar.gz ~/truck-dis-6.5.tar.gz
    

结论

测试发现即使把osd的journal mv到新的文件,osd的写操作还是会更新到这个journal文件的,感觉是osd一直持有journal的文件句柄。

FileJournal定义:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
class FileJournal : public Journal {
...
int fd;
...
Mutex writeq_lock;
Cond writeq_cond;
deque<write_item> writeq;
...
class Writer : public Thread {
FileJournal *journal;
public:
Writer(FileJournal *fj) : journal(fj) {}
void *entry() {
journal->write_thread_entry();
return 0;
}
} write_thread;
...
}

journal初始化fd:

1
OSD::init() -> FileStore::mount() -> JournalingObjectStore::journal_replay() -> FileJournal::open() -> FileJournal::_open()

journal close fd:

1
OSD::shutdown() -> FileStore::umount() -> JournalingObjectStore::journal_write_close() -> FileJournal::close()

journal写数据:

1
FileJournal.Writer.entry() -> FileJournal::write_thread_entry() -> FileJournal::do_write() -> FileJournal::write_bl()

模拟journal文件不可读

后续把某个osd的journal文件配置在别的硬盘上,然后把journal在的硬盘offline,写object返回error,该osd变为down状态。

1
2
3
4
5
6
echo offline > /sys/block/sdm/device/state
echo running > /sys/block/sdm/device/state

mount -t xfs /dev/sdm /data/cache/osd27 -o rw,noatime,inode64,logbsize=256k,delaylog

service ceph restart osd.27

分析journal文件

journal文件其实就是创建的时候在文件最前面写一个header

1
2
3
4
5
6
7
8
9
10
11
12
13
struct header_t {
enum {
FLAG_CRC = (1<<0),
// NOTE: remove kludgey weirdness in read_header() next time a flag is added.
};

uint64_t flags;
uuid_d fsid;
__u32 block_size;
__u32 alignment;
int64_t max_size; // max size of journal ring buffer
int64_t start; // offset of first entry
uint64_t committed_up_to; // committed up to

验证:

1
2
3
4
5
6
7
8
9
10
11
OSD journal get fsid command:
# ceph-osd -i 58 --get-journal-fsid
4ac9b14d-652b-4480-a158-8837d67d1651

# xxd -l 4096 journal myj
# head -n 5 myj
0000000: 0400 0000 4000 0000 0100 0000 0000 0000 ....@...........
0000010: 4ac9 b14d 652b 4480 a158 8837 d67d 1651 J..Me+D..X.7.}.Q
0000020: 0010 0000 0010 0000 0000 0071 0200 0000 ...........q....
0000030: 0090 9d4e 0000 0000 eeea 0000 0000 0000 ...N............
0000040: efea 0000 0000 0000 0000 0000 0000 0000 ................

重做journal

重新make journal:

1
ceph-osd -i 58 --mkjournal

这个命令可以创建一个osd的journal文件, 它会读取ceph配置,在osd的目录下生成journal文件,如果journal文件存在就失败

然后用新的journal文件重启osd service成功。

ceph journal其他命令

osd journal flush command:

1
2
3
# ceph-osd -i 58 --flush-journal
2015-05-14 11:18:05.019196 7f175b62b800 -1 os/FileJournal.cc:95 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
2015-05-14 11:18:05.212845 7f175b62b800 -1 ceph_osd.cc:271 flushed journal /data/cache/osd58/journal for object store /data/cache/osd58

OSD journal dump command:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# ceph-osd -i 58 --dump-journal
2015-05-14 11:28:55.490619 7f1125980800 -1 common/admin_socket.cc:512 asok(0x4ab0230) AdminSocketConfigObs::init: failed: AdminSocket::bind_and_listen: failed to bind the UNIX domain socket to '/var/run/ceph/ceph-osd.58.asok': (17) File exists
2015-05-14 11:28:55.490695 7f1125980800 -1 os/FileJournal.cc:95 journal FileJournal::_open: disabling aio for non-block journal. Use journal_force_aio to force use of aio anyway
[
{ "offset": 1411604480,
"seq": 93784,
"transactions": [
{ "trans_num": 0,
"ops": [
{ "op_num": 0,
"op_name": "omap_setkeys",
"collection": "meta",
"oid": "51ad36\/pglog_5.4ad\/0\/\/-1",
"attr_lens": { "0000000179.00000000000000000080": 186}},
{ "op_num": 1,
"op_name": "omap_setkeys",
"collection": "meta",
"oid": "16ef7597\/infos\/head\/\/-1",
"attr_lens": { "5.4ad_epoch": 4,
"5.4ad_info": 721}},
{ "op_num": 2,
"op_name": "omap_rmkeys",
"collection": "meta",
"oid": "51ad36\/pglog_5.4ad\/0\/\/-1"},
{ "op_num": 3,
"op_name": "omap_setkeys",
"collection": "meta",
"oid": "51ad36\/pglog_5.4ad\/0\/\/-1",
"attr_lens": { "0000000179.00000000000000000080": 186,
"can_rollback_to": 12}},
{ "op_num": 4,
"op_name": "touch",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5"},
{ "op_num": 5,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "_user.rgw.idtag",
"length": 19},
{ "op_num": 6,
"op_name": "write",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"length": 12,
"offset": 0,
"bufferlist length": 12},
{ "op_num": 7,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "_user.rgw.acl",
"length": 39},
{ "op_num": 8,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "_user.rgw.etag",
"length": 33},
{ "op_num": 9,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "_user.rgw.total_size",
"length": 3},
{ "op_num": 10,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "_",
"length": 273},
{ "op_num": 11,
"op_name": "setattr",
"collection": "5.4ad_head",
"oid": "546bdcad\/default.4250.345_health_check\/18_1431573984.96\/head\/\/5",
"name": "snapset",
"length": 31}]}]}2015-05-14 11:28:55.506607 7f1125980800 -1 ceph_osd.cc:285 dumped journal /data/cache/osd58/journal for object store /data/cache/osd58
支持原创