[pve-devel] corosync bug: cluster break after 1 node clean shutdown

Alexandre DERUMIER aderumier at odiso.com
Wed Sep 16 10:30:29 CEST 2020


backtrace of all threads.
(I don't have libqb or fuse debug symbol)


(gdb) info threads
  Id   Target Id                                    Frame 
* 1    Thread 0x7fce63d5f900 (LWP 16239) "pmxcfs"   0x00007fce67721896 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
  2    Thread 0x7fce63ce6700 (LWP 16240) "cfs_loop" 0x00007fce676497ef in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
  3    Thread 0x7fce618c9700 (LWP 16246) "server"   0x00007fce676497ef in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
  4    Thread 0x7fce46beb700 (LWP 30256) "pmxcfs"   0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
  5    Thread 0x7fce2abf9700 (LWP 43943) "pmxcfs"   0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  6    Thread 0x7fce610a6700 (LWP 13346) "pmxcfs"   0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  7    Thread 0x7fce2a3f8700 (LWP 8832) "pmxcfs"    0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
  8    Thread 0x7fce28bf5700 (LWP 3464) "pmxcfs"    0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  9    Thread 0x7fce453e8700 (LWP 3727) "pmxcfs"    0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  10   Thread 0x7fce2bfff700 (LWP 6705) "pmxcfs"    0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  11   Thread 0x7fce293f6700 (LWP 41454) "pmxcfs"   0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  12   Thread 0x7fce45be9700 (LWP 17734) "pmxcfs"   0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
  13   Thread 0x7fce2b7fe700 (LWP 17762) "pmxcfs"   0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  14   Thread 0x7fce463ea700 (LWP 2347) "pmxcfs"    0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
  15   Thread 0x7fce44be7700 (LWP 11335) "pmxcfs"   0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0


(gdb) thread 1
[Switching to thread 1 (Thread 0x7fce63d5f900 (LWP 16239))]
#0  0x00007fce67721896 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67721896 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce67721988 in __new_sem_wait_slow.constprop.0 () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#2  0x00007fce678c5f98 in fuse_session_loop_mt () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678cb577 in fuse_loop_mt () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00005617f6d5ab0e in main (argc=<optimized out>, argv=<optimized out>) at pmxcfs.c:1055
        ret = -1
        lockfd = <optimized out>
        pipefd = {8, 9}
        foreground = 0
        force_local_mode = 0
        wrote_pidfile = 1
        memdb = 0x5617f7c563b0
        dcdb = 0x5617f8046ca0
        status_fsm = 0x5617f806a630
        context = <optimized out>
        entries = {{long_name = 0x5617f6d7440f "debug", short_name = 100 'd', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x5617f6d82104 <cfs+20>, description = 0x5617f6d742cb "Turn on debug messages", arg_description = 0x0}, {
            long_name = 0x5617f6d742e2 "foreground", short_name = 102 'f', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x7ffd672edc38, description = 0x5617f6d742ed "Do not daemonize server", arg_description = 0x0}, {
            long_name = 0x5617f6d74305 "local", short_name = 108 'l', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x7ffd672edc3c, description = 0x5617f6d746a0 "Force local mode (ignore corosync.conf, force quorum)", 
            arg_description = 0x0}, {long_name = 0x0, short_name = 0 '\000', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x0, description = 0x0, arg_description = 0x0}}
        err = 0x0
        __func__ = "main"
        utsname = {sysname = "Linux", '\000' <repeats 59 times>, nodename = "m6kvm1", '\000' <repeats 58 times>, release = "5.4.60-1-pve", '\000' <repeats 52 times>, 
          version = "#1 SMP PVE 5.4.60-1 (Mon, 31 Aug 2020 10:36:22 +0200)", '\000' <repeats 11 times>, machine = "x86_64", '\000' <repeats 58 times>, __domainname = "(none)", '\000' <repeats 58 times>}
        dot = <optimized out>
        www_data = <optimized out>
        create = <optimized out>
        conf_data = 0x5617f80466a0
        len = <optimized out>
        config = <optimized out>
        bplug = <optimized out>
        fa = {0x5617f6d7441e "-f", 0x5617f6d74421 "-odefault_permissions", 0x5617f6d74437 "-oallow_other", 0x0}
        fuse_args = {argc = 1, argv = 0x5617f8046960, allocated = 1}
        fuse_chan = 0x5617f7c560c0
        corosync_loop = 0x5617f80481d0
        service_quorum = 0x5617f8048460
        service_confdb = 0x5617f8069da0
        service_dcdb = 0x5617f806a5d0
        service_status = 0x5617f806a8e0



(gdb) thread 2
[Switching to thread 2 (Thread 0x7fce63ce6700 (LWP 16240))]
#0  0x00007fce676497ef in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
(gdb) bt full
#0  0x00007fce676497ef in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fce67500b2a in ?? () from /lib/x86_64-linux-gnu/libqb.so.0
No symbol table info available.
#2  0x00007fce674f1bb0 in qb_loop_run () from /lib/x86_64-linux-gnu/libqb.so.0
No symbol table info available.
#3  0x00005617f6d5cd31 in cfs_loop_worker_thread (data=0x5617f80481d0) at loop.c:330
        __func__ = "cfs_loop_worker_thread"
        loop = 0x5617f80481d0
        qbloop = 0x5617f8048230
        l = <optimized out>
        ctime = <optimized out>
        th = 7222815479134420993
#4  0x00007fce67968415 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
No symbol table info available.
#5  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#6  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) bt full
#0  0x00007fce676497ef in epoll_wait () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fce67500b2a in ?? () from /lib/x86_64-linux-gnu/libqb.so.0
No symbol table info available.
#2  0x00007fce674f1bb0 in qb_loop_run () from /lib/x86_64-linux-gnu/libqb.so.0
No symbol table info available.
#3  0x00005617f6d5e453 in worker_thread (data=<optimized out>) at server.c:529
        th = 5863405772435619840
        __func__ = "worker_thread"
        _g_boolean_var_ = <optimized out>
#4  0x00007fce67968415 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
No symbol table info available.
#5  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#6  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) bt full
#0  0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fce67989f9f in g_cond_wait () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
No symbol table info available.
#2  0x00005617f6d67f14 in dfsm_send_message_sync (dfsm=dfsm at entry=0x5617f8046ca0, msgtype=msgtype at entry=5, iov=iov at entry=0x7fce46bea870, len=len at entry=8, rp=rp at entry=0x7fce46bea860) at dfsm.c:339
        __func__ = "dfsm_send_message_sync"
        msgcount = 189465
        header = {base = {type = 0, subtype = 5, protocol_version = 1, time = 1600241342, reserved = 0}, count = 189465}
        real_iov = {{iov_base = 0x7fce46bea7e0, iov_len = 24}, {iov_base = 0x7fce46bea84c, iov_len = 4}, {iov_base = 0x7fce46bea940, iov_len = 4}, {iov_base = 0x7fce46bea858, iov_len = 4}, {iov_base = 0x7fce46bea85c, iov_len = 4}, {
            iov_base = 0x7fce46bea948, iov_len = 4}, {iov_base = 0x7fce1c002980, iov_len = 34}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        result = CS_OK
#3  0x00005617f6d6491e in dcdb_send_fuse_message (dfsm=0x5617f8046ca0, msg_type=msg_type at entry=DCDB_MESSAGE_CFS_CREATE, path=0x7fce1c002980 "nodes/m6kvm1/lrm_status.tmp.42080", to=to at entry=0x0, buf=buf at entry=0x0, size=<optimized out>, 
    size at entry=0, offset=<optimized out>, flags=<optimized out>) at dcdb.c:157
        iov = {{iov_base = 0x7fce46bea84c, iov_len = 4}, {iov_base = 0x7fce46bea940, iov_len = 4}, {iov_base = 0x7fce46bea858, iov_len = 4}, {iov_base = 0x7fce46bea85c, iov_len = 4}, {iov_base = 0x7fce46bea948, iov_len = 4}, {
            iov_base = 0x7fce1c002980, iov_len = 34}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        pathlen = 34
        tolen = 0
        rc = {msgcount = 189465, result = -16, processed = 0}
#4  0x00005617f6d6b640 in cfs_plug_memdb_create (mode=<optimized out>, fi=<optimized out>, path=<optimized out>, plug=0x5617f8048b70) at cfs-plug-memdb.c:307
        res = <optimized out>
        mdb = 0x5617f8048b70
        __func__ = "cfs_plug_memdb_create"
        res = <optimized out>
        mdb = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        ctime = <optimized out>
#5  cfs_plug_memdb_create (plug=0x5617f8048b70, path=<optimized out>, mode=<optimized out>, fi=<optimized out>) at cfs-plug-memdb.c:291
        res = <optimized out>
        mdb = <optimized out>
        __func__ = "cfs_plug_memdb_create"
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        ctime = <optimized out>
#6  0x00005617f6d5ca5d in cfs_fuse_create (path=0x7fce1c000bf0 "/nodes/m6kvm1/lrm_status.tmp.42080", mode=33188, fi=0x7fce46beab30) at pmxcfs.c:415
        __func__ = "cfs_fuse_create"
        ret = -13
        subpath = 0x7fce1c002980 "nodes/m6kvm1/lrm_status.tmp.42080"
        plug = <optimized out>
#7  0x00007fce678c152b in fuse_fs_create () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#8  0x00007fce678c165a in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#9  0x00007fce678c7bb7 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#10 0x00007fce678c90b8 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#11 0x00007fce678c5d5c in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#12 0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#13 0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) thread 5
[Switching to thread 5 (Thread 0x7fce2abf9700 (LWP 43943))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


gdb) thread 6
[Switching to thread 6 (Thread 0x7fce610a6700 (LWP 13346))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) thread 7
[Switching to thread 7 (Thread 0x7fce2a3f8700 (LWP 8832))]
#0  0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
(gdb) bt full
#0  0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fce67989f9f in g_cond_wait () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
No symbol table info available.
#2  0x00005617f6d67f14 in dfsm_send_message_sync (dfsm=dfsm at entry=0x5617f8046ca0, msgtype=msgtype at entry=5, iov=iov at entry=0x7fce2a3f7870, len=len at entry=8, rp=rp at entry=0x7fce2a3f7860) at dfsm.c:339
        __func__ = "dfsm_send_message_sync"
        msgcount = 189466
        header = {base = {type = 0, subtype = 5, protocol_version = 1, time = 1600242491, reserved = 0}, count = 189466}
        real_iov = {{iov_base = 0x7fce2a3f77e0, iov_len = 24}, {iov_base = 0x7fce2a3f784c, iov_len = 4}, {iov_base = 0x7fce2a3f7940, iov_len = 4}, {iov_base = 0x7fce2a3f7858, iov_len = 4}, {iov_base = 0x7fce2a3f785c, iov_len = 4}, {
            iov_base = 0x7fce2a3f7948, iov_len = 4}, {iov_base = 0x7fce1406f350, iov_len = 10}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        result = CS_OK
#3  0x00005617f6d6491e in dcdb_send_fuse_message (dfsm=0x5617f8046ca0, msg_type=msg_type at entry=DCDB_MESSAGE_CFS_CREATE, path=0x7fce1406f350 "testalex2", to=to at entry=0x0, buf=buf at entry=0x0, size=<optimized out>, size at entry=0, 
    offset=<optimized out>, flags=<optimized out>) at dcdb.c:157
        iov = {{iov_base = 0x7fce2a3f784c, iov_len = 4}, {iov_base = 0x7fce2a3f7940, iov_len = 4}, {iov_base = 0x7fce2a3f7858, iov_len = 4}, {iov_base = 0x7fce2a3f785c, iov_len = 4}, {iov_base = 0x7fce2a3f7948, iov_len = 4}, {
            iov_base = 0x7fce1406f350, iov_len = 10}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        pathlen = 10
        tolen = 0
        rc = {msgcount = 189466, result = -16, processed = 0}
#4  0x00005617f6d6b640 in cfs_plug_memdb_create (mode=<optimized out>, fi=<optimized out>, path=<optimized out>, plug=0x5617f8048b70) at cfs-plug-memdb.c:307
        res = <optimized out>
        mdb = 0x5617f8048b70
        __func__ = "cfs_plug_memdb_create"
        res = <optimized out>
        mdb = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        ctime = <optimized out>
#5  cfs_plug_memdb_create (plug=0x5617f8048b70, path=<optimized out>, mode=<optimized out>, fi=<optimized out>) at cfs-plug-memdb.c:291
        res = <optimized out>
        mdb = <optimized out>
        __func__ = "cfs_plug_memdb_create"
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        _g_boolean_var_ = <optimized out>
        ctime = <optimized out>
#6  0x00005617f6d5ca5d in cfs_fuse_create (path=0x7fce14022ff0 "/testalex2", mode=33188, fi=0x7fce2a3f7b30) at pmxcfs.c:415
        __func__ = "cfs_fuse_create"
        ret = -13
        subpath = 0x7fce1406f350 "testalex2"
        plug = <optimized out>
#7  0x00007fce678c152b in fuse_fs_create () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#8  0x00007fce678c165a in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#9  0x00007fce678c7bb7 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#10 0x00007fce678c90b8 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#11 0x00007fce678c5d5c in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#12 0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#13 0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


(gdb) thread 8
[Switching to thread 8 (Thread 0x7fce28bf5700 (LWP 3464))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


(gdb) thread 9
[Switching to thread 9 (Thread 0x7fce453e8700 (LWP 3727))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


(gdb) thread 10
[Switching to thread 10 (Thread 0x7fce2bfff700 (LWP 6705))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) thread 11
[Switching to thread 11 (Thread 0x7fce293f6700 (LWP 41454))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) thread 12
[Switching to thread 12 (Thread 0x7fce45be9700 (LWP 17734))]
#0  0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
(gdb) bt full
#0  0x00007fce67643f59 in syscall () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.
#1  0x00007fce67989f9f in g_cond_wait () from /lib/x86_64-linux-gnu/libglib-2.0.so.0
No symbol table info available.
#2  0x00005617f6d67f14 in dfsm_send_message_sync (dfsm=dfsm at entry=0x5617f8046ca0, msgtype=msgtype at entry=2, iov=iov at entry=0x7fce45be88e0, len=len at entry=8, rp=rp at entry=0x7fce45be88d0) at dfsm.c:339
        __func__ = "dfsm_send_message_sync"
        msgcount = 189464
        header = {base = {type = 0, subtype = 2, protocol_version = 1, time = 1600241340, reserved = 0}, count = 189464}
        real_iov = {{iov_base = 0x7fce45be8850, iov_len = 24}, {iov_base = 0x7fce45be88bc, iov_len = 4}, {iov_base = 0x7fce45be89b0, iov_len = 4}, {iov_base = 0x7fce45be88c8, iov_len = 4}, {iov_base = 0x7fce45be88cc, iov_len = 4}, {
            iov_base = 0x7fce45be89b8, iov_len = 4}, {iov_base = 0x7fce34001dd0, iov_len = 31}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        result = CS_OK
#3  0x00005617f6d6491e in dcdb_send_fuse_message (dfsm=0x5617f8046ca0, msg_type=msg_type at entry=DCDB_MESSAGE_CFS_MKDIR, path=0x7fce34001dd0 "priv/lock/file-replication_cfg", to=to at entry=0x0, buf=buf at entry=0x0, size=<optimized out>, 
    size at entry=0, offset=<optimized out>, flags=<optimized out>) at dcdb.c:157
        iov = {{iov_base = 0x7fce45be88bc, iov_len = 4}, {iov_base = 0x7fce45be89b0, iov_len = 4}, {iov_base = 0x7fce45be88c8, iov_len = 4}, {iov_base = 0x7fce45be88cc, iov_len = 4}, {iov_base = 0x7fce45be89b8, iov_len = 4}, {
            iov_base = 0x7fce34001dd0, iov_len = 31}, {iov_base = 0x0, iov_len = 0}, {iov_base = 0x0, iov_len = 0}}
        pathlen = 31
        tolen = 0
        rc = {msgcount = 189464, result = -16, processed = 0}
#4  0x00005617f6d6b0e7 in cfs_plug_memdb_mkdir (plug=0x5617f8048b70, path=<optimized out>, mode=<optimized out>) at cfs-plug-memdb.c:329
        __func__ = "cfs_plug_memdb_mkdir"
        res = <optimized out>
        mdb = 0x5617f8048b70
#5  0x00005617f6d5bc70 in cfs_fuse_mkdir (path=0x7fce34002240 "/priv/lock/file-replication_cfg", mode=493) at pmxcfs.c:238
        __func__ = "cfs_fuse_mkdir"
        ret = -13
        subpath = 0x7fce34001dd0 "priv/lock/file-replication_cfg"
        plug = <optimized out>
#6  0x00007fce678c2ccb in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#7  0x00007fce678c90b8 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#8  0x00007fce678c5d5c in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#9  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#10 0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

(gdb) thread 13
[Switching to thread 13 (Thread 0x7fce2b7fe700 (LWP 17762))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


(gdb) thread 14
[Switching to thread 14 (Thread 0x7fce463ea700 (LWP 2347))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.


(gdb) thread 15
[Switching to thread 15 (Thread 0x7fce44be7700 (LWP 11335))]
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
(gdb) bt full
#0  0x00007fce67722544 in read () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#1  0x00007fce678c56d2 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#2  0x00007fce678c7249 in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#3  0x00007fce678c5cdf in ?? () from /lib/x86_64-linux-gnu/libfuse.so.2
No symbol table info available.
#4  0x00007fce67718fa3 in start_thread () from /lib/x86_64-linux-gnu/libpthread.so.0
No symbol table info available.
#5  0x00007fce676494cf in clone () from /lib/x86_64-linux-gnu/libc.so.6
No symbol table info available.

----- Mail original -----
De: "aderumier" <aderumier at odiso.com>
À: "Proxmox VE development discussion" <pve-devel at lists.proxmox.com>
Cc: "Thomas Lamprecht" <t.lamprecht at proxmox.com>
Envoyé: Mercredi 16 Septembre 2020 09:58:02
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown

here a backtrace with pve-cluster-dbgsym installed 


(gdb) bt full 
#0 0x00007fce67721896 in do_futex_wait.constprop () from /lib/x86_64-linux-gnu/libpthread.so.0 
No symbol table info available. 
#1 0x00007fce67721988 in __new_sem_wait_slow.constprop.0 () from /lib/x86_64-linux-gnu/libpthread.so.0 
No symbol table info available. 
#2 0x00007fce678c5f98 in fuse_session_loop_mt () from /lib/x86_64-linux-gnu/libfuse.so.2 
No symbol table info available. 
#3 0x00007fce678cb577 in fuse_loop_mt () from /lib/x86_64-linux-gnu/libfuse.so.2 
No symbol table info available. 
#4 0x00005617f6d5ab0e in main (argc=<optimized out>, argv=<optimized out>) at pmxcfs.c:1055 
ret = -1 
lockfd = <optimized out> 
pipefd = {8, 9} 
foreground = 0 
force_local_mode = 0 
wrote_pidfile = 1 
memdb = 0x5617f7c563b0 
dcdb = 0x5617f8046ca0 
status_fsm = 0x5617f806a630 
context = <optimized out> 
entries = {{long_name = 0x5617f6d7440f "debug", short_name = 100 'd', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x5617f6d82104 <cfs+20>, description = 0x5617f6d742cb "Turn on debug messages", arg_description = 0x0}, { 
long_name = 0x5617f6d742e2 "foreground", short_name = 102 'f', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x7ffd672edc38, description = 0x5617f6d742ed "Do not daemonize server", arg_description = 0x0}, { 
long_name = 0x5617f6d74305 "local", short_name = 108 'l', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x7ffd672edc3c, description = 0x5617f6d746a0 "Force local mode (ignore corosync.conf, force quorum)", 
arg_description = 0x0}, {long_name = 0x0, short_name = 0 '\000', flags = 0, arg = G_OPTION_ARG_NONE, arg_data = 0x0, description = 0x0, arg_description = 0x0}} 
err = 0x0 
__func__ = "main" 
utsname = {sysname = "Linux", '\000' <repeats 59 times>, nodename = "m6kvm1", '\000' <repeats 58 times>, release = "5.4.60-1-pve", '\000' <repeats 52 times>, 
version = "#1 SMP PVE 5.4.60-1 (Mon, 31 Aug 2020 10:36:22 +0200)", '\000' <repeats 11 times>, machine = "x86_64", '\000' <repeats 58 times>, __domainname = "(none)", '\000' <repeats 58 times>} 
dot = <optimized out> 
www_data = <optimized out> 
create = <optimized out> 
conf_data = 0x5617f80466a0 
len = <optimized out> 
config = <optimized out> 
bplug = <optimized out> 
fa = {0x5617f6d7441e "-f", 0x5617f6d74421 "-odefault_permissions", 0x5617f6d74437 "-oallow_other", 0x0} 
fuse_args = {argc = 1, argv = 0x5617f8046960, allocated = 1} 
fuse_chan = 0x5617f7c560c0 
corosync_loop = 0x5617f80481d0 
service_quorum = 0x5617f8048460 
service_confdb = 0x5617f8069da0 
service_dcdb = 0x5617f806a5d0 
service_status = 0x5617f806a8e0 


----- Mail original ----- 
De: "aderumier" <aderumier at odiso.com> 
À: "Proxmox VE development discussion" <pve-devel at lists.proxmox.com> 
Cc: "Thomas Lamprecht" <t.lamprecht at proxmox.com> 
Envoyé: Mercredi 16 Septembre 2020 09:34:27 
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown 

Hi, 

I have produced the problem now, 

and this time I don't have restarted corosync a second after the lock of /etc/pve 

so, currently it's readonly. 


I don't have used gdb since a long time, 

could you tell me how to attach to the running pmxcfs process, and some gbd commands to launch ? 

----- Mail original ----- 
De: "aderumier" <aderumier at odiso.com> 
À: "Proxmox VE development discussion" <pve-devel at lists.proxmox.com> 
Cc: "Thomas Lamprecht" <t.lamprecht at proxmox.com> 
Envoyé: Mardi 15 Septembre 2020 17:58:33 
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown 

Another small lock at 17:41:09 

To be sure, I have done a small loop of write each second in /etc/pve, node node2. 

it's hanging at first corosync restart, then, on second corosync restart it's working again. 

I'll try to improve this tomorrow to be able to debug corosync process 
- restarting corosync 
do some write in /etc/pve/ 
- and if it's hanging don't restart corosync again 



node2: echo test > /etc/pve/test loop 
-------------------------------------- 
Current time : 17:41:01 
Current time : 17:41:02 
Current time : 17:41:03 
Current time : 17:41:04 
Current time : 17:41:05 
Current time : 17:41:06 
Current time : 17:41:07 
Current time : 17:41:08 
Current time : 17:41:09 

hang 

Current time : 17:42:05 
Current time : 17:42:06 
Current time : 17:42:07 



node1 
----- 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] pmtud: PMTUD completed for host: 6 link: 0 current link mtu: 1397 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] pmtud: Starting PMTUD for host: 10 link: 0 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] udp: detected kernel MTU: 1500 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [TOTEM ] Knet pMTU change: 1397 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] pmtud: PMTUD link change for host: 10 link: 0 from 469 to 1397 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] pmtud: PMTUD completed for host: 10 link: 0 current link mtu: 1397 
Sep 15 17:41:08 m6kvm1 corosync[18145]: [KNET ] pmtud: Global data MTU changed to: 1397 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] IPC credentials authenticated (/dev/shm/qb-18145-16239-31-zx6KJM/qb) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] connecting to client [16239] 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] connection created 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QUORUM] lib_init_fn: conn=0x556c2918d5f0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QUORUM] got quorum_type request on 0x556c2918d5f0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QUORUM] got trackstart request on 0x556c2918d5f0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QUORUM] sending initial status to 0x556c2918d5f0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QUORUM] sending quorum notification to 0x556c2918d5f0, length = 52 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] IPC credentials authenticated (/dev/shm/qb-18145-16239-32-I7ZZ6e/qb) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] connecting to client [16239] 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] connection created 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CMAP ] lib_init_fn: conn=0x556c2918ef20 
Sep 15 17:41:09 m6kvm1 pmxcfs[16239]: [status] notice: update cluster info (cluster name m6kvm, version = 20) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] IPC credentials authenticated (/dev/shm/qb-18145-16239-33-6RKbvH/qb) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] connecting to client [16239] 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] connection created 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] lib_init_fn: conn=0x556c2918ad00, cpd=0x556c2918b50c 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] IPC credentials authenticated (/dev/shm/qb-18145-16239-34-GAY5T9/qb) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] connecting to client [16239] 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [QB ] shm size:1048589; real_size:1052672; rb->word_size:263168 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] connection created 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] lib_init_fn: conn=0x556c2918c740, cpd=0x556c2918ce8c 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] Creating commit token because I am the rep. 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] Saving state aru 5 high seq received 5 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Storing new sequence id for ring 1197 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] entering COMMIT state. 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] got commit token 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] entering RECOVERY state. 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] TRANS [0] member 1: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [0] member 1: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (1.1193) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 5 high delivered 5 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [1] member 2: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [2] member 3: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [3] member 4: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
ep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [4] member 5: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [5] member 6: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [6] member 7: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [7] member 8: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [8] member 9: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [9] member 10: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [10] member 11: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [11] member 12: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [12] member 13: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] position [13] member 14: 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] Did not need to originate any messages in recovery. 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] got commit token 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] Sending initial ORF token 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 0, aru 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 1, aru 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 2, aru 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 3, aru 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] retrans flag count 4 token aru 0 install seq 0 aru 0 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] Resetting old ring state 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] recovery to regular 1-0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] waiting_trans_ack changed to 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.90) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.91) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.92) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.93) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.94) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.95) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.96) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.97) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.107) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.108) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.109) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.110) 
ep 15 17:41:09 m6kvm1 corosync[18145]: [MAIN ] Member joined: r(0) ip(10.3.94.111) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [SYNC ] call init for locally known services 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] entering OPERATIONAL state. 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [TOTEM ] A new membership (1.1197) was formed. Members joined: 2 3 4 5 6 7 8 9 10 11 12 13 14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [SYNC ] enter sync process 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [SYNC ] Committing synchronization for corosync configuration map access 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 2 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 3 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 4 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 5 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 6 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 7 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 8 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 9 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 10 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 11 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 12 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] got joinlist message from node 13 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [SYNC ] Committing synchronization for corosync cluster closed process group service v1.01 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] my downlist: members(old:1 left:0) 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[0] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.110) , pid:30209 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[1] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.110) , pid:30209 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[2] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.109) , pid:31350 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[3] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.109) , pid:31350 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[4] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.108) , pid:3569 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[5] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.108) , pid:3569 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[6] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.107) , pid:19504 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[7] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.107) , pid:19504 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[8] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.97) , pid:11947 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[9] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.97) , pid:11947 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[10] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.96) , pid:20814 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[11] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.96) , pid:20814 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[12] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.95) , pid:39420 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[13] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.95) , pid:39420 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[14] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.94) , pid:12452 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[15] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.94) , pid:12452 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[16] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.93) , pid:44300 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[17] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.93) , pid:44300 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[18] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.92) , pid:42259 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[19] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.92) , pid:42259 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[20] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.91) , pid:40630 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[21] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.91) , pid:40630 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[22] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.90) , pid:25870 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[23] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.90) , pid:25870 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[24] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.111) , pid:25634 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [CPG ] joinlist_messages[25] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.111) , pid:25634 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] flags: quorate: No Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] Sending nodelist callback. ring_id = 1.1197 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 13 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[13]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] total_votes=2, expected_votes=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 13 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[14]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] total_votes=3, expected_votes=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[1]: votes: 1, expected: 14 flags: 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] flags: quorate: No Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] total_votes=3, expected_votes=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 2 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[2]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] total_votes=4, expected_votes=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 2 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 2 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm1 corosync[18145]: [VOTEQ ] got nodeinfo message from cluster node 3 
.... 
.... 
next corosync restart 

Sep 15 17:42:03 m6kvm1 corosync[18145]: [MAIN ] Node was shut down by a signal 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [SERV ] Unloading all Corosync service engines. 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [QB ] withdrawing server sockets 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [QB ] qb_ipcs_unref() - destroying 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [SERV ] Service engine unloaded: corosync vote quorum service v1.0 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [QB ] qb_ipcs_disconnect(/dev/shm/qb-18145-16239-32-I7ZZ6e/qb) state:2 
Sep 15 17:42:03 m6kvm1 pmxcfs[16239]: [confdb] crit: cmap_dispatch failed: 2 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [MAIN ] cs_ipcs_connection_closed() 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [CMAP ] exit_fn for conn=0x556c2918ef20 
Sep 15 17:42:03 m6kvm1 corosync[18145]: [MAIN ] cs_ipcs_connection_destroyed() 


node2 
----- 



Sep 15 17:41:05 m6kvm2 corosync[25411]: [KNET ] pmtud: Starting PMTUD for host: 10 link: 0 
Sep 15 17:41:05 m6kvm2 corosync[25411]: [KNET ] udp: detected kernel MTU: 1500 
Sep 15 17:41:05 m6kvm2 corosync[25411]: [KNET ] pmtud: PMTUD completed for host: 10 link: 0 current link mtu: 1397 
Sep 15 17:41:07 m6kvm2 corosync[25411]: [KNET ] rx: host: 1 link: 0 received pong: 2 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [TOTEM ] entering GATHER state from 11(merge during join). 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:08 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: Source host 1 not reachable yet. Discarding packet. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] rx: host: 1 link: 0 is up 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] Knet host change callback. nodeid: 1 reachable: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] host: host: 1 (passive) best link: 0 (pri: 1) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] pmtud: Starting PMTUD for host: 1 link: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] udp: detected kernel MTU: 1500 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [KNET ] pmtud: PMTUD completed for host: 1 link: 0 current link mtu: 1397 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] got commit token 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] Saving state aru 123 high seq received 123 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [MAIN ] Storing new sequence id for ring 1197 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] entering COMMIT state. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] got commit token 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] entering RECOVERY state. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [0] member 2: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [1] member 3: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [2] member 4: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [3] member 5: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [4] member 6: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [5] member 7: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [6] member 8: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [7] member 9: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [8] member 10: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [9] member 11: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [10] member 12: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [11] member 13: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] TRANS [12] member 14: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [0] member 1: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (1.1193) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 5 high delivered 5 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [1] member 2: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [2] member 3: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [3] member 4: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [4] member 5: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [5] member 6: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [6] member 7: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [7] member 8: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [8] member 9: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [9] member 10: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [10] member 11: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [11] member 12: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [12] member 13: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] position [13] member 14: 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] previous ringid (2.1192) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] aru 123 high delivered 123 received flag 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] Did not need to originate any messages in recovery. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 0, aru ffffffff 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 1, aru 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 2, aru 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] token retrans flag is 0 my set retrans flag0 retrans queue empty 1 count 3, aru 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] install seq 0 aru 0 high seq received 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] retrans flag count 4 token aru 0 install seq 0 aru 0 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] Resetting old ring state 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] recovery to regular 1-0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] waiting_trans_ack changed to 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [MAIN ] Member joined: r(0) ip(10.3.94.89) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [SYNC ] call init for locally known services 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] entering OPERATIONAL state. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] A new membership (1.1197) was formed. Members joined: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [SYNC ] enter sync process 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [SYNC ] Committing synchronization for corosync configuration map access 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CMAP ] Not first sync -> no action 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 3 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 4 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 5 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 6 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 7 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 8 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 9 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 10 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 11 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 12 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] downlist left_list: 0 received 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got joinlist message from node 13 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [SYNC ] Committing synchronization for corosync cluster closed process group service v1.01 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] my downlist: members(old:13 left:0) 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[0] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.110) , pid:30209 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[1] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.110) , pid:30209 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[2] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.109) , pid:31350 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[3] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.109) , pid:31350 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[4] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.108) , pid:3569 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[5] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.108) , pid:3569 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[6] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.107) , pid:19504 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[7] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.107) , pid:19504 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[8] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.97) , pid:11947 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[9] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.97) , pid:11947 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[10] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.96) , pid:20814 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[11] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.96) , pid:20814 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[12] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.95) , pid:39420 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[13] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.95) , pid:39420 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[14] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.94) , pid:12452 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[15] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.94) , pid:12452 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[16] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.93) , pid:44300 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[17] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.93) , pid:44300 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[18] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.92) , pid:42259 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[19] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.92) , pid:42259 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[20] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.91) , pid:40630 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[21] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.91) , pid:40630 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[22] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.90) , pid:25870 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[23] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.90) , pid:25870 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[24] group:pve_kvstore_v1\x00, ip:r(0) ip(10.3.94.111) , pid:25634 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] joinlist_messages[25] group:pve_dcdb_v1\x00, ip:r(0) ip(10.3.94.111) , pid:25634 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] Sending nodelist callback. ring_id = 1.1197 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 13 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[13]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 13 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[14]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[1]: votes: 1, expected: 14 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: No Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] total_votes=14, expected_votes=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 3 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 4 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 5 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 6 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 7 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 8 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 9 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 10 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 11 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 12 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 2 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] lowest node id: 1 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] highest node id: 14 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[2]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] total_votes=14, expected_votes=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 3 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 4 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 5 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 6 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 7 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 8 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 9 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 10 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 11 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 12 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 2 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] lowest node id: 1 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] highest node id: 14 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 3 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[3]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 3 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 4 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[4]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 4 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 5 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[5]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 5 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 6 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[6]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 6 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 7 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[7]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 7 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 8 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[8]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 8 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 9 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[9]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 9 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 10 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[10]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 10 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 11 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[11]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 11 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 12 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[12]: votes: 1, expected: 14 flags: 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] flags: quorate: Yes Leaving: No WFA Status: No First: No Qdevice: No QdeviceAlive: No QdeviceCastVote: No QdeviceMasterWins: No 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] got nodeinfo message from cluster node 12 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] nodeinfo message[0]: votes: 0, expected: 0 flags: 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [SYNC ] Committing synchronization for corosync vote quorum service v1.0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] total_votes=14, expected_votes=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 1 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 3 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 4 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 5 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 6 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 7 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 8 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 9 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 10 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 11 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 12 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 13 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 14 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] node 2 state=1, votes=1, expected=14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] lowest node id: 1 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] highest node id: 14 us: 2 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [QUORUM] Members[14]: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [QUORUM] sending quorum notification to (nil), length = 104 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [VOTEQ ] Sending quorum callback, quorate = 1 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [MAIN ] Completed service synchronization, ready to provide service. 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [TOTEM ] waiting_trans_ack changed to 0 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got procjoin message from cluster node 1 (r(0) ip(10.3.94.89) ) for pid 16239 
Sep 15 17:41:09 m6kvm2 corosync[25411]: [CPG ] got procjoin message from cluster node 1 (r(0) ip(10.3.94.89) ) for pid 16239 

----- Mail original ----- 
De: "aderumier" <aderumier at odiso.com> 
À: "Thomas Lamprecht" <t.lamprecht at proxmox.com> 
Cc: "Proxmox VE development discussion" <pve-devel at lists.proxmox.com> 
Envoyé: Mardi 15 Septembre 2020 16:57:46 
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown 

>>I mean this is bad, but also great! 
>>Cam you do a coredump of the whole thing and upload it somewhere with the version info 
>>used (for dbgsym package)? That could help a lot. 

I'll try to reproduce it again (with the full lock everywhere), and do the coredump. 




I have tried the real time scheduling, 

but I still have been able to reproduce the "lrm too long" for 60s (but as I'm restarting corosync each minute, I think it's unlocking 
something at next corosync restart.) 


this time it was blocked at the same time on a node in: 

work { 
... 
} elsif ($state eq 'active') { 
.... 
$self->update_lrm_status(); 


and another node in 

if ($fence_request) { 
$haenv->log('err', "node need to be fenced - releasing agent_lock\n"); 
$self->set_local_status({ state => 'lost_agent_lock'}); 
} elsif (!$self->get_protected_ha_agent_lock()) { 
$self->set_local_status({ state => 'lost_agent_lock'}); 
} elsif ($self->{mode} eq 'maintenance') { 
$self->set_local_status({ state => 'maintenance'}); 
} 





----- Mail original ----- 
De: "Thomas Lamprecht" <t.lamprecht at proxmox.com> 
À: "aderumier" <aderumier at odiso.com> 
Cc: "Proxmox VE development discussion" <pve-devel at lists.proxmox.com> 
Envoyé: Mardi 15 Septembre 2020 16:32:52 
Objet: Re: [pve-devel] corosync bug: cluster break after 1 node clean shutdown 

On 9/15/20 4:09 PM, Alexandre DERUMIER wrote: 
>>> Can you try to give pmxcfs real time scheduling, e.g., by doing: 
>>> 
>>> # systemctl edit pve-cluster 
>>> 
>>> And then add snippet: 
>>> 
>>> 
>>> [Service] 
>>> CPUSchedulingPolicy=rr 
>>> CPUSchedulingPriority=99 
> yes, sure, I'll do it now 
> 
> 
>> I'm currently digging the logs 
>>> Is your most simplest/stable reproducer still a periodic restart of corosync in one node? 
> yes, a simple "systemctl restart corosync" on 1 node each minute 
> 
> 
> 
> After 1hour, it's still locked. 
> 
> on other nodes, I still have pmxfs logs like: 
> 

I mean this is bad, but also great! 
Cam you do a coredump of the whole thing and upload it somewhere with the version info 
used (for dbgsym package)? That could help a lot. 


> manual "pmxcfs -d" 
> https://gist.github.com/aderumier/4cd91d17e1f8847b93ea5f621f257c2e 
> 

Hmm, the fuse connection of the previous one got into a weird state (or something is still 
running) but I'd rather say this is a side-effect not directly connected to the real bug. 

> 
> some interesting dmesg about "pvesr" 
> 
> [Tue Sep 15 14:45:34 2020] INFO: task pvesr:19038 blocked for more than 120 seconds. 
> [Tue Sep 15 14:45:34 2020] Tainted: P O 5.4.60-1-pve #1 
> [Tue Sep 15 14:45:34 2020] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. 
> [Tue Sep 15 14:45:34 2020] pvesr D 0 19038 1 0x00000080 
> [Tue Sep 15 14:45:34 2020] Call Trace: 
> [Tue Sep 15 14:45:34 2020] __schedule+0x2e6/0x6f0 
> [Tue Sep 15 14:45:34 2020] ? filename_parentat.isra.57.part.58+0xf7/0x180 
> [Tue Sep 15 14:45:34 2020] schedule+0x33/0xa0 
> [Tue Sep 15 14:45:34 2020] rwsem_down_write_slowpath+0x2ed/0x4a0 
> [Tue Sep 15 14:45:34 2020] down_write+0x3d/0x40 
> [Tue Sep 15 14:45:34 2020] filename_create+0x8e/0x180 
> [Tue Sep 15 14:45:34 2020] do_mkdirat+0x59/0x110 
> [Tue Sep 15 14:45:34 2020] __x64_sys_mkdir+0x1b/0x20 
> [Tue Sep 15 14:45:34 2020] do_syscall_64+0x57/0x190 
> [Tue Sep 15 14:45:34 2020] entry_SYSCALL_64_after_hwframe+0x44/0xa9 
> 

hmm, hangs in mkdir (cluster wide locking) 


_______________________________________________ 
pve-devel mailing list 
pve-devel at lists.proxmox.com 
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 


_______________________________________________ 
pve-devel mailing list 
pve-devel at lists.proxmox.com 
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 


_______________________________________________ 
pve-devel mailing list 
pve-devel at lists.proxmox.com 
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 


_______________________________________________ 
pve-devel mailing list 
pve-devel at lists.proxmox.com 
https://lists.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 







More information about the pve-devel mailing list