5.15.64-1-pve kernel breaks scheduler? unable to ssh, start emu, or even reboot

Tue Nov 8 14:29:53 CET 2022

A regular update today rendered my node completely broken. I could not ssh,
although I could telnet on port 22 just fine, implying the main sshd
process was running fine, but the spawn subprocesses were stalling. I could
not reboot, all the processes were getting stalled, too. The VMs also
wouldn't start, although the containers did. I could http into the web
interface. I could also open the console from the web interface, which
resulted in a successful ssh login, per /var/log/messages — all of which
could also indicate some permission issue actually.

Looking at the dmesg/syslog, processes simply hung indefinitely:

Code:
[  242.862135] INFO: task kworker/8:1:165 blocked for more than 120 seconds.
[  242.862156]       Tainted: P           O      5.15.64-1-pve #1
[  242.862169] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[  242.862185] task:kworker/8:1     state:D stack:    0 pid:  165 ppid:
2 flags:0x00004000
[  242.862205] Workqueue: ipv6_addrconf addrconf_dad_work
[  242.862220] Call Trace:
[  242.862227]  <TASK>
[  242.862233]  __schedule+0x34e/0x1740
[  242.862244]  ? update_load_avg+0x82/0x640
[  242.862256]  schedule+0x69/0x110
[  242.862264]  schedule_preempt_disabled+0xe/0x20
[  242.862275]  __mutex_lock.constprop.0+0x255/0x480
[  242.862287]  __mutex_lock_slowpath+0x13/0x20
[  242.862609]  mutex_lock+0x38/0x50
[  242.862932]  rtnl_lock+0x15/0x20
[  242.863248]  addrconf_dad_work+0x39/0x4d0
[  242.863557]  process_one_work+0x228/0x3d0
[  242.863861]  worker_thread+0x53/0x420
[  242.864160]  ? process_one_work+0x3d0/0x3d0
[  242.864457]  kthread+0x127/0x150
[  242.864756]  ? set_kthread_struct+0x50/0x50
[  242.865063]  ret_from_fork+0x1f/0x30
[  242.865361]  </TASK>
[  242.865657] INFO: task kworker/9:3:984 blocked for more than 120 seconds.
[  242.865954]       Tainted: P           O      5.15.64-1-pve #1
[  242.866268] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[  242.866549] task:kworker/9:3     state:D stack:    0 pid:  984 ppid:
2 flags:0x00004000
[  242.866831] Workqueue: events switchdev_deferred_process_work
[  242.867119] Call Trace:
[  242.867401]  <TASK>
[  242.867681]  __schedule+0x34e/0x1740
[  242.867969]  ? dequeue_entity+0xd8/0x490
[  242.868253]  schedule+0x69/0x110
[  242.868525]  schedule_preempt_disabled+0xe/0x20
[  242.868792]  __mutex_lock.constprop.0+0x255/0x480
[  242.869056]  ? add_timer_on+0x115/0x180
[  242.869312]  __mutex_lock_slowpath+0x13/0x20
[  242.869564]  mutex_lock+0x38/0x50
[  242.869839]  rtnl_lock+0x15/0x20
[  242.870105]  switchdev_deferred_process_work+0xe/0x20
[  242.870339]  process_one_work+0x228/0x3d0
[  242.870567]  worker_thread+0x53/0x420
[  242.870792]  ? process_one_work+0x3d0/0x3d0
[  242.871021]  kthread+0x127/0x150
[  242.871248]  ? set_kthread_struct+0x50/0x50
[  242.871478]  ret_from_fork+0x1f/0x30
[  242.871707]  </TASK>
[  242.871940] INFO: task task UPID:proxm:1587 blocked for more than 120
seconds.
[  242.872180]       Tainted: P           O      5.15.64-1-pve #1
[  242.872423] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[  242.872673] task:task UPID:proxm state:D stack:    0 pid: 1587 ppid:
 1586 flags:0x00004000
[  242.872937] Call Trace:
[  242.873195]  <TASK>
[  242.873450]  __schedule+0x34e/0x1740
[  242.873706]  ? asm_sysvec_apic_timer_interrupt+0x1b/0x20
[  242.873970]  schedule+0x69/0x110
[  242.874260]  schedule_preempt_disabled+0xe/0x20
[  242.874524]  __mutex_lock.constprop.0+0x255/0x480
[  242.874789]  __mutex_lock_slowpath+0x13/0x20
[  242.875054]  mutex_lock+0x38/0x50
[  242.875310]  rtnl_lock+0x15/0x20
[  242.875559]  unregister_netdev+0x13/0x30
[  242.875804]  igbvf_remove+0x50/0x100 [igbvf]
[  242.876042]  pci_device_remove+0x3b/0xb0
[  242.876273]  __device_release_driver+0x1a8/0x2a0
[  242.876503]  device_release_driver+0x29/0x40
[  242.876737]  pci_stop_bus_device+0x74/0xa0
[  242.876976]  pci_stop_and_remove_bus_device+0x13/0x30
[  242.877213]  pci_iov_remove_virtfn+0xc5/0x130
[  242.877452]  sriov_disable+0x3a/0xf0
[  242.877687]  pci_disable_sriov+0x26/0x30
[  242.877956]  igb_disable_sriov+0x64/0x110 [igb]
[  242.878223]  igb_remove+0xca/0x210 [igb]
[  242.878456]  pci_device_remove+0x3b/0xb0
[  242.878686]  __device_release_driver+0x1a8/0x2a0
[  242.878919]  device_driver_detach+0x56/0xe0
[  242.879151]  unbind_store+0x12a/0x140
[  242.879384]  drv_attr_store+0x21/0x40
[  242.879614]  sysfs_kf_write+0x3c/0x50
[  242.879844]  kernfs_fop_write_iter+0x13c/0x1d0
[  242.880072]  new_sync_write+0x111/0x1b0
[  242.880300]  vfs_write+0x1d9/0x270
[  242.880529]  ksys_write+0x67/0xf0
[  242.880757]  __x64_sys_write+0x1a/0x20
[  242.880980]  do_syscall_64+0x59/0xc0
[  242.881197]  ? __x64_sys_newfstat+0x16/0x20
[  242.881406]  ? do_syscall_64+0x69/0xc0
[  242.881608]  ? do_syscall_64+0x69/0xc0
[  242.881833]  ? exc_page_fault+0x89/0x170
[  242.882057]  entry_SYSCALL_64_after_hwframe+0x61/0xcb
[  242.882256] RIP: 0033:0x7fdec53defb3
[  242.882451] RSP: 002b:00007ffd1e77f198 EFLAGS: 00000246 ORIG_RAX:
0000000000000001
[  242.882657] RAX: ffffffffffffffda RBX: 000055b45d966c30 RCX:
00007fdec53defb3
[  242.882866] RDX: 000000000000000c RSI: 000055b45d966c30 RDI:
000000000000000e
[  242.883075] RBP: 000000000000000c R08: 0000000000000000 R09:
000055b4561343b0
[  242.883287] R10: 000055b45d955b38 R11: 0000000000000246 R12:
000055b45d965910
[  242.883499] R13: 000055b4570b72a0 R14: 000000000000000e R15:
000055b45d965910
[  242.883710]  </TASK>
[  242.883916] INFO: task sshd:1751 blocked for more than 120 seconds.
[  242.884132]       Tainted: P           O      5.15.64-1-pve #1
[  242.884353] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[  242.884581] task:sshd            state:D stack:    0 pid: 1751 ppid:
 1348 flags:0x00000000
[  242.884820] Call Trace:
[  242.885055]  <TASK>
[  242.885282]  __schedule+0x34e/0x1740
[  242.885511]  ? kmem_cache_free+0x24d/0x290
[  242.885743]  ? spl_kmem_cache_free+0x145/0x200 [spl]
[  242.886051]  schedule+0x69/0x110
[  242.886286]  schedule_preempt_disabled+0xe/0x20
[  242.886521]  __mutex_lock.constprop.0+0x255/0x480
[  242.886759]  ? rtnl_create_link+0x330/0x330
[  242.886994]  __mutex_lock_slowpath+0x13/0x20
[  242.887232]  mutex_lock+0x38/0x50
[  242.887465]  __netlink_dump_start+0xc7/0x300
[  242.887704]  ? rtnl_create_link+0x330/0x330
[  242.887942]  rtnetlink_rcv_msg+0x2b8/0x410
[  242.888179]  ? kernel_init_free_pages.part.0+0x4a/0x70
[  242.888422]  ? rtnl_create_link+0x330/0x330
[  242.888663]  ? rtnl_calcit.isra.0+0x130/0x130
[  242.888904]  netlink_rcv_skb+0x53/0x100
[  242.889146]  rtnetlink_rcv+0x15/0x20
[  242.889391]  netlink_unicast+0x224/0x340
[  242.889633]  netlink_sendmsg+0x23e/0x4a0
[  242.889875]  sock_sendmsg+0x66/0x70
[  242.890181]  __sys_sendto+0x113/0x190
[  242.890424]  ? syscall_exit_to_user_mode+0x27/0x50
[  242.890668]  ? __x64_sys_bind+0x1a/0x30
[  242.890913]  ? do_syscall_64+0x69/0xc0
[  242.891150]  ? exit_to_user_mode_prepare+0x37/0x1b0
[  242.891384]  __x64_sys_sendto+0x29/0x40
[  242.891615]  do_syscall_64+0x59/0xc0
[  242.891850]  entry_SYSCALL_64_after_hwframe+0x61/0xcb
[  242.892088] RIP: 0033:0x7f98367dafa6
[  242.892322] RSP: 002b:00007ffc18ce7808 EFLAGS: 00000246 ORIG_RAX:
000000000000002c
[  242.892567] RAX: ffffffffffffffda RBX: 00007ffc18ce8900 RCX:
00007f98367dafa6
[  242.892812] RDX: 0000000000000014 RSI: 00007ffc18ce8900 RDI:
0000000000000004
[  242.893055] RBP: 00007ffc18ce8950 R08: 00007ffc18ce88a4 R09:
000000000000000c
[  242.893290] R10: 0000000000000000 R11: 0000000000000246 R12:
00007ffc18ce88a4
[  242.893520] R13: 00000000000006d7 R14: 0000000000000004 R15:
00007ffc18ce8b60
[  242.893761]  </TASK>

Pinning the previous kernel, the 5.15.60-2-pve, brought everything back to
normal.

P.S. Additionally, this happened:

USAGE: qm stop <vmid> [OPTIONS]

  Stop virtual machine. The qemu process will exit immediately. Thisis akin
  to pulling the power plug of a running computer and may damage the VM
  data

(...)
  -skiplock  <boolean>
             Ignore locks - only root is allowed to use this option.
(...)

root at proxmox:~# qm stop 112 -skiplock
trying to acquire lock...
can't lock file '/var/lock/qemu-server/lock-112.conf' - got timeout
root at proxmox:~# qm stop 112 -skiplock true
trying to acquire lock...
can't lock file '/var/lock/qemu-server/lock-112.conf' - received interrupt
root at proxmox:~# qm stop 112 -skiplock 1
trying to acquire lock...
can't lock file '/var/lock/qemu-server/lock-112.conf' - got timeout

How is one supposed to use the skiplock option?

-- 
Best Regards,
Dawid Wrobel