[pve-devel] [PATCH qemu-server 1/7] qmeventd: add handling for -no-shutdown QEMU instances
Stefan Reiter
s.reiter at proxmox.com
Thu Sep 3 10:58:45 CEST 2020
We take care of killing QEMU processes when a guest shuts down manually.
QEMU will not exit itself, if started with -no-shutdown, but it will
still emit a "SHUTDOWN" event, which we await and then send SIGTERM.
This additionally allows us to handle backups in such situations. A
vzdump instance will connect to our socket and identify itself as such
in the handshake, sending along a VMID which will be marked as backing
up until the file handle is closed.
When a SHUTDOWN event is received while the VM is backing up, we do not
kill the VM. And when the vzdump handle is closed, we check if the
guest has started up since, and only if it's determined to still be
turned off, we then finally kill QEMU.
We cannot wait for QEMU directly to finish the backup (i.e. with
query-backup), as that would kill the VM too fast for vzdump to send the
last 'query-backup' to mark the backup as successful and done.
For handling 'query-status' messages sent to QEMU, a state-machine-esque
protocol is implemented into the Client struct (ClientState). This is
necessary, since QMP works asynchronously, and results arrive on the
same channel as events and even the handshake.
For referencing QEMU Clients from vzdump messages, they are kept in a
hash table. This requires linking against glib.
Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
---
debian/control | 1 +
qmeventd/Makefile | 4 +-
qmeventd/qmeventd.c | 277 ++++++++++++++++++++++++++++++++++++++------
qmeventd/qmeventd.h | 41 ++++++-
4 files changed, 281 insertions(+), 42 deletions(-)
diff --git a/debian/control b/debian/control
index 393badc..35c77da 100644
--- a/debian/control
+++ b/debian/control
@@ -20,6 +20,7 @@ Build-Depends: debhelper (>= 11~),
pve-edk2-firmware,
pve-firewall,
pve-qemu-kvm,
+ libglib2.0-dev,
Standards-Version: 4.3.0
Homepage: http://www.proxmox.com
diff --git a/qmeventd/Makefile b/qmeventd/Makefile
index a68818d..c2fdc6d 100644
--- a/qmeventd/Makefile
+++ b/qmeventd/Makefile
@@ -9,8 +9,8 @@ include /usr/share/pve-doc-generator/pve-doc-generator.mk
CC ?= gcc
CFLAGS += -O2 -Werror -Wall -Wextra -Wpedantic -Wtype-limits -Wl,-z,relro -std=gnu11
-CFLAGS += $(shell pkg-config --cflags json-c)
-LDFLAGS += $(shell pkg-config --libs json-c)
+CFLAGS += $(shell pkg-config --cflags json-c glib-2.0)
+LDFLAGS += $(shell pkg-config --libs json-c glib-2.0)
qmeventd: qmeventd.c
$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
diff --git a/qmeventd/qmeventd.c b/qmeventd/qmeventd.c
index 77c6297..5616ae4 100644
--- a/qmeventd/qmeventd.c
+++ b/qmeventd/qmeventd.c
@@ -55,12 +55,15 @@
#include <sys/un.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <gmodule.h>
#include "qmeventd.h"
static int verbose = 0;
static int epoll_fd = 0;
static const char *progname;
+GHashTable *vm_clients; // key=vmid (freed on remove), value=*Client (free manually)
+
/*
* Helper functions
*/
@@ -165,15 +168,39 @@ must_write(int fd, const char *buf, size_t len)
* qmp handling functions
*/
+static void
+send_qmp_cmd(struct Client *client, const char *buf, size_t len)
+{
+ if (!must_write(client->fd, buf, len - 1)) {
+ fprintf(stderr, "%s: cannot send QMP message\n", client->qemu.vmid);
+ cleanup_client(client);
+ }
+}
+
void
handle_qmp_handshake(struct Client *client)
{
- VERBOSE_PRINT("%s: got QMP handshake\n", client->vmid);
- static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
- if (!must_write(client->fd, qmp_answer, sizeof(qmp_answer) - 1)) {
- fprintf(stderr, "%s: cannot complete handshake\n", client->vmid);
+ VERBOSE_PRINT("pid%d: got QMP handshake, assuming QEMU client\n", client->pid);
+
+ // extract vmid from cmdline, now that we know it's a QEMU process
+ unsigned long vmid = get_vmid_from_pid(client->pid);
+ int res = snprintf(client->qemu.vmid, sizeof(client->qemu.vmid), "%lu", vmid);
+ if (vmid == 0 || res < 0 || res >= (int)sizeof(client->qemu.vmid)) {
+ fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
cleanup_client(client);
+ return;
}
+
+ VERBOSE_PRINT("pid%d: assigned VMID: %s\n", client->pid, client->qemu.vmid);
+ client->type = CLIENT_QEMU;
+ if(!g_hash_table_insert(vm_clients, strdup(client->qemu.vmid), client)) {
+ // not fatal, just means backup handling won't work
+ fprintf(stderr, "%s: could not insert client into VMID->client table\n",
+ client->qemu.vmid);
+ }
+
+ static const char qmp_answer[] = "{\"execute\":\"qmp_capabilities\"}\n";
+ send_qmp_cmd(client, qmp_answer, sizeof(qmp_answer));
}
void
@@ -183,18 +210,156 @@ handle_qmp_event(struct Client *client, struct json_object *obj)
if (!json_object_object_get_ex(obj, "event", &event)) {
return;
}
- VERBOSE_PRINT("%s: got QMP event: %s\n", client->vmid,
+ VERBOSE_PRINT("%s: got QMP event: %s\n", client->qemu.vmid,
json_object_get_string(event));
+
+ if (client->state == STATE_TERMINATING) {
+ // QEMU sometimes sends a second SHUTDOWN after SIGTERM, ignore
+ VERBOSE_PRINT("%s: event was after termination, ignoring\n",
+ client->qemu.vmid);
+ return;
+ }
+
// event, check if shutdown and get guest parameter
if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
- client->graceful = 1;
+ client->qemu.graceful = 1;
struct json_object *data;
struct json_object *guest;
if (json_object_object_get_ex(obj, "data", &data) &&
json_object_object_get_ex(data, "guest", &guest))
{
- client->guest = (unsigned short)json_object_get_boolean(guest);
+ client->qemu.guest = (unsigned short)json_object_get_boolean(guest);
}
+
+ // check if a backup is running and kill QEMU process if not
+ terminate_check(client);
+ }
+}
+
+void
+terminate_check(struct Client *client)
+{
+ if (client->state != STATE_IDLE) {
+ // if we're already in a request, queue this one until after
+ VERBOSE_PRINT("%s: terminate_check queued\n", client->qemu.vmid);
+ client->qemu.term_check_queued = true;
+ return;
+ }
+
+ client->qemu.term_check_queued = false;
+
+ VERBOSE_PRINT("%s: query-status\n", client->qemu.vmid);
+ client->state = STATE_EXPECT_STATUS_RESP;
+ static const char qmp_req[] = "{\"execute\":\"query-status\"}\n";
+ send_qmp_cmd(client, qmp_req, sizeof(qmp_req));
+}
+
+void
+handle_qmp_return(struct Client *client, struct json_object *data, bool error)
+{
+ if (error) {
+ const char *msg = "n/a";
+ struct json_object *desc;
+ if (json_object_object_get_ex(data, "desc", &desc)) {
+ msg = json_object_get_string(desc);
+ }
+ fprintf(stderr, "%s: received error from QMP: %s\n",
+ client->qemu.vmid, msg);
+ client->state = STATE_IDLE;
+ goto out;
+ }
+
+ struct json_object *status;
+ json_bool has_status = data &&
+ json_object_object_get_ex(data, "status", &status);
+
+ bool active = false;
+ if (has_status) {
+ const char *status_str = json_object_get_string(status);
+ active = status_str &&
+ (!strcmp(status_str, "running") || !strcmp(status_str, "paused"));
+ }
+
+ switch (client->state) {
+ case STATE_EXPECT_STATUS_RESP:
+ client->state = STATE_IDLE;
+ if (active) {
+ VERBOSE_PRINT("%s: got status: VM is active\n", client->qemu.vmid);
+ } else if (!client->qemu.backup) {
+ terminate_client(client);
+ } else {
+ // if we're in a backup, don't do anything, vzdump will notify
+ // us when the backup finishes
+ VERBOSE_PRINT("%s: not active, but running backup - keep alive\n",
+ client->qemu.vmid);
+ }
+ break;
+
+ // this means we received the empty return from our handshake answer
+ case STATE_HANDSHAKE:
+ client->state = STATE_IDLE;
+ VERBOSE_PRINT("%s: QMP handshake complete\n", client->qemu.vmid);
+ break;
+
+ case STATE_IDLE:
+ case STATE_TERMINATING:
+ VERBOSE_PRINT("%s: spurious return value received\n",
+ client->qemu.vmid);
+ break;
+ }
+
+out:
+ if (client->qemu.term_check_queued) {
+ terminate_check(client);
+ }
+}
+
+/*
+ * VZDump specific client functions
+ */
+
+void
+handle_vzdump_handshake(struct Client *client, struct json_object *data)
+{
+ client->state = STATE_IDLE;
+
+ struct json_object *vmid_obj;
+ json_bool has_vmid = data && json_object_object_get_ex(data, "vmid", &vmid_obj);
+
+ if (!has_vmid) {
+ VERBOSE_PRINT("pid%d: invalid vzdump handshake: no vmid\n",
+ client->pid);
+ return;
+ }
+
+ const char *vmid_str = json_object_get_string(vmid_obj);
+
+ if (!vmid_str) {
+ VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid is not a string\n",
+ client->pid);
+ return;
+ }
+
+ int res = snprintf(client->vzdump.vmid, sizeof(client->vzdump.vmid), "%s", vmid_str);
+ if (res < 0 || res >= (int)sizeof(client->vzdump.vmid)) {
+ VERBOSE_PRINT("pid%d: invalid vzdump handshake: vmid too long or invalid\n",
+ client->pid);
+ return;
+ }
+
+ struct Client *vmc =
+ (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
+ if (vmc) {
+ vmc->qemu.backup = true;
+
+ // only mark as VZDUMP once we have set everything up, otherwise 'cleanup'
+ // might try to access an invalid value
+ client->type = CLIENT_VZDUMP;
+ VERBOSE_PRINT("%s: vzdump backup started\n",
+ client->vzdump.vmid);
+ } else {
+ VERBOSE_PRINT("%s: vzdump requested backup start for unregistered VM\n",
+ client->vzdump.vmid);
}
}
@@ -206,30 +371,25 @@ void
add_new_client(int client_fd)
{
struct Client *client = calloc(sizeof(struct Client), 1);
+ client->state = STATE_HANDSHAKE;
+ client->state = CLIENT_NONE;
client->fd = client_fd;
client->pid = get_pid_from_fd(client_fd);
if (client->pid == 0) {
fprintf(stderr, "could not get pid from client\n");
goto err;
}
- unsigned long vmid = get_vmid_from_pid(client->pid);
- int res = snprintf(client->vmid, sizeof(client->vmid), "%lu", vmid);
- if (vmid == 0 || res < 0 || res >= (int)sizeof(client->vmid)) {
- fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
- goto err;
- }
struct epoll_event ev;
ev.events = EPOLLIN;
ev.data.ptr = client;
- res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
+ int res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
if (res < 0) {
perror("epoll_ctl client add");
goto err;
}
- VERBOSE_PRINT("added new client, pid: %d, vmid: %s\n", client->pid,
- client->vmid);
+ VERBOSE_PRINT("added new client, pid: %d\n", client->pid);
return;
err:
@@ -237,20 +397,16 @@ err:
free(client);
}
-void
-cleanup_client(struct Client *client)
+static void
+cleanup_qemu_client(struct Client *client)
{
- VERBOSE_PRINT("%s: client exited, status: graceful: %d, guest: %d\n",
- client->vmid, client->graceful, client->guest);
- log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
- (void)close(client->fd);
-
- unsigned short graceful = client->graceful;
- unsigned short guest = client->guest;
- char vmid[sizeof(client->vmid)];
- strncpy(vmid, client->vmid, sizeof(vmid));
- free(client);
- VERBOSE_PRINT("%s: executing cleanup\n", vmid);
+ unsigned short graceful = client->qemu.graceful;
+ unsigned short guest = client->qemu.guest;
+ char vmid[sizeof(client->qemu.vmid)];
+ strncpy(vmid, client->qemu.vmid, sizeof(vmid));
+ g_hash_table_remove(vm_clients, &vmid); // frees key, ignore errors
+ VERBOSE_PRINT("%s: executing cleanup (graceful: %d, guest: %d)\n",
+ vmid, graceful, guest);
int pid = fork();
if (pid < 0) {
@@ -275,10 +431,51 @@ cleanup_client(struct Client *client)
}
}
+void
+cleanup_client(struct Client *client)
+{
+ log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
+ (void)close(client->fd);
+
+ struct Client *vmc;
+ switch (client->type) {
+ case CLIENT_QEMU:
+ cleanup_qemu_client(client);
+ break;
+
+ case CLIENT_VZDUMP:
+ vmc = (struct Client*) g_hash_table_lookup(vm_clients, client->vzdump.vmid);
+ if (vmc) {
+ VERBOSE_PRINT("%s: backup ended\n", client->vzdump.vmid);
+ vmc->qemu.backup = false;
+ terminate_check(vmc);
+ }
+ break;
+
+ case CLIENT_NONE:
+ // do nothing, only close socket
+ break;
+ }
+
+ free(client);
+}
+
+void
+terminate_client(struct Client *client)
+{
+ VERBOSE_PRINT("%s: terminating client (pid %d)\n",
+ client->qemu.vmid, client->pid);
+
+ client->state = STATE_TERMINATING;
+
+ int err = kill(client->pid, SIGTERM);
+ log_neg(err, "kill");
+}
+
void
handle_client(struct Client *client)
{
- VERBOSE_PRINT("%s: entering handle\n", client->vmid);
+ VERBOSE_PRINT("pid%d: entering handle\n", client->pid);
ssize_t len;
do {
len = read(client->fd, (client->buf+client->buflen),
@@ -292,12 +489,12 @@ handle_client(struct Client *client)
}
return;
} else if (len == 0) {
- VERBOSE_PRINT("%s: got EOF\n", client->vmid);
+ VERBOSE_PRINT("pid%d: got EOF\n", client->pid);
cleanup_client(client);
return;
}
- VERBOSE_PRINT("%s: read %ld bytes\n", client->vmid, len);
+ VERBOSE_PRINT("pid%d: read %ld bytes\n", client->pid, len);
client->buflen += len;
struct json_tokener *tok = json_tokener_new();
@@ -318,20 +515,26 @@ handle_client(struct Client *client)
handle_qmp_handshake(client);
} else if (json_object_object_get_ex(jobj, "event", &obj)) {
handle_qmp_event(client, jobj);
+ } else if (json_object_object_get_ex(jobj, "return", &obj)) {
+ handle_qmp_return(client, obj, false);
+ } else if (json_object_object_get_ex(jobj, "error", &obj)) {
+ handle_qmp_return(client, obj, true);
+ } else if (json_object_object_get_ex(jobj, "vzdump", &obj)) {
+ handle_vzdump_handshake(client, obj);
} // else ignore message
}
break;
case json_tokener_continue:
if (client->buflen >= sizeof(client->buf)) {
- VERBOSE_PRINT("%s, msg too large, discarding buffer\n",
- client->vmid);
+ VERBOSE_PRINT("pid%d: msg too large, discarding buffer\n",
+ client->pid);
memset(client->buf, 0, sizeof(client->buf));
client->buflen = 0;
} // else we have enough space try again after next read
break;
default:
- VERBOSE_PRINT("%s: parse error: %d, discarding buffer\n",
- client->vmid, jerr);
+ VERBOSE_PRINT("pid%d: parse error: %d, discarding buffer\n",
+ client->pid, jerr);
memset(client->buf, 0, client->buflen);
client->buflen = 0;
break;
@@ -402,6 +605,8 @@ main(int argc, char *argv[])
bail_neg(daemon(0, 1), "daemon");
}
+ vm_clients = g_hash_table_new_full(g_str_hash, g_str_equal, free, NULL);
+
int nevents;
for(;;) {
diff --git a/qmeventd/qmeventd.h b/qmeventd/qmeventd.h
index 0c2ffc1..30aea98 100644
--- a/qmeventd/qmeventd.h
+++ b/qmeventd/qmeventd.h
@@ -38,18 +38,51 @@ static inline void bail_neg(int errval, const char *msg)
}
}
+typedef enum {
+ CLIENT_NONE,
+ CLIENT_QEMU,
+ CLIENT_VZDUMP
+} ClientType;
+
+typedef enum {
+ STATE_HANDSHAKE,
+ STATE_IDLE,
+ STATE_EXPECT_STATUS_RESP,
+ STATE_TERMINATING
+} ClientState;
+
struct Client {
char buf[4096];
- char vmid[16];
+ unsigned int buflen;
+
int fd;
pid_t pid;
- unsigned int buflen;
- unsigned short graceful;
- unsigned short guest;
+
+ ClientType type;
+ ClientState state;
+
+ // only relevant for type=CLIENT_QEMU
+ struct {
+ char vmid[16];
+ unsigned short graceful;
+ unsigned short guest;
+ bool term_check_queued;
+ bool backup;
+ } qemu;
+
+ // only relevant for type=CLIENT_VZDUMP
+ struct {
+ // vmid of referenced backup
+ char vmid[16];
+ } vzdump;
};
void handle_qmp_handshake(struct Client *client);
void handle_qmp_event(struct Client *client, struct json_object *obj);
+void handle_qmp_return(struct Client *client, struct json_object *data, bool error);
+void handle_vzdump_handshake(struct Client *client, struct json_object *data);
void handle_client(struct Client *client);
void add_new_client(int client_fd);
void cleanup_client(struct Client *client);
+void terminate_client(struct Client *client);
+void terminate_check(struct Client *client);
--
2.20.1
More information about the pve-devel
mailing list