[pve-devel] [PATCH qemu-server v4 1/3] add qmeventd
Wolfgang Bumiller
w.bumiller at proxmox.com
Wed Nov 7 12:54:36 CET 2018
On Mon, Nov 05, 2018 at 01:59:59PM +0100, Dominik Csapak wrote:
> this adds a program that can listen to qemu qmp events on a given socket
> and if a shutdown event followed by a disconnected socket occurs,
> executes qm cleanup with arguments that indicate if the
> vm was closed gracefully and whether the guest initiated it
>
> this is useful if we want to cleanup after the qemu process exited,
> e.g. tap devices, vgpus, etc.
>
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
>
> diff --git a/Makefile b/Makefile
> index c531d04..9af6df6 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -36,6 +40,9 @@ all:
> dinstall: deb
> dpkg -i ${DEB}
>
> +qmeventd: qmeventd.c
> + $(CC) $(CFLAGS) ${JSON_CFLAGS} -o $@ $< ${JSON_LIBS}
^ Trailing space
> +
> qm.bash-completion:
> PVE_GENERATING_DOCS=1 perl -I. -T -e "use PVE::CLI::qm; PVE::CLI::qm->generate_bash_completions();" >$@.tmp
> mv $@.tmp $@
> diff --git a/debian/control b/debian/control
> index f3b9ca0..7fc27ed 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -19,6 +21,7 @@ Depends: dbus,
> genisoimage,
> libc6 (>= 2.7-18),
> libio-multiplex-perl,
> + libjson-c3,
Shouldn't ${shlibs:Depends} already do this?
> libjson-perl,
> libjson-xs-perl,
> libnet-ssleay-perl,
> diff --git a/qmeventd.c b/qmeventd.c
> new file mode 100644
> index 0000000..9cfe5ff
> --- /dev/null
> +++ b/qmeventd.c
> @@ -0,0 +1,433 @@
> +/*
> +
> + Copyright (C) 2018 Proxmox Server Solutions GmbH
> +
> + Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; version 2 dated June, 1991.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
> + 02111-1307, USA.
> +
> + Author: Dominik Csapak <d.csapak at proxmox.com>
> +
> + qmevend listens on a given socket, and waits for qemu processes
> + to connect
> +
> + it then waits for shutdown events followed by the closing of the socket,
> + it then calls /usr/sbin/qm cleanup with following arguments
> +
> + /usr/sbin/qm cleanup VMID <graceful> <guest>
> +
> + parameter explanation:
> +
> + graceful:
> + 1|0 depending if it saw a shutdown event before the socket closed
> +
> + guest:
> + 1|0 depending if the shutdown was requested from the guest
> +
> +*/
> +
> +#ifndef _GNU_SOURCE
> +#define _GNU_SOURCE
> +#endif
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <json.h>
> +#include <signal.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <sys/epoll.h>
> +#include <sys/socket.h>
> +#include <sys/types.h>
> +#include <sys/wait.h>
> +#include <sys/un.h>
> +#include <unistd.h>
> +
> +#include "qmeventd.h"
> +
> +static int verbose = 0;
> +static int epoll_fd = 0;
> +static const char *progname;
> +/*
> + * Helper functions
> + */
> +
> +static void
> +usage()
> +{
> + fprintf(stderr, "Usage: %s [-f] [-v] PATH\n", progname);
> + fprintf(stderr, " -f run in foreground (default: false)\n");
> + fprintf(stderr, " -v verbose (default: false)\n");
> + fprintf(stderr, " PATH use PATH for socket\n");
> +}
> +
> +static pid_t
> +get_pid_from_fd(int fd)
> +{
> + struct ucred credentials = { .pid = 0, .uid = 0, .gid = 0 };
> + socklen_t len = sizeof(struct ucred);
> + log_neg(getsockopt(fd, SOL_SOCKET, SO_PEERCRED, &credentials, &len), "getsockopt");
> + return credentials.pid;
> +}
> +
> +/*
> + * reads the vmid from /proc/<pid>/cmdline
> + * after the '-id' argument
> + */
> +static unsigned long
> +get_vmid_from_pid(pid_t pid)
> +{
> + char filename[32] = { 0 };
> + int len = snprintf(filename, sizeof(filename), "/proc/%d/cmdline", pid);
> + if (len < 0) {
> + fprintf(stderr, "error during snprintf for %d: %s\n", pid,
> + strerror(errno));
> + return 0;
> + }
> + if ((size_t)len >= sizeof(filename)) {
> + fprintf(stderr, "error: pid %d too long\n", pid);
> + return 0;
> + }
> + FILE *fp = fopen(filename, "re");
> + if (fp == NULL) {
> + fprintf(stderr, "error opening %s: %s\n", filename, strerror(errno));
> + return 0;
> + }
> +
> + unsigned long vmid = 0;
> + ssize_t rc = 0;
> + char *buf = NULL;
> + size_t buflen = 0;
> + while ((rc = getdelim(&buf, &buflen, '\0', fp)) >= 0) {
> + if (!strcmp(buf, "-id")) {
> + break;
> + }
> + }
> +
> + if (rc < 0) {
> + goto err;
> + }
> +
> + if (getdelim(&buf, &buflen, '\0', fp) >= 0) {
> + if (buf[0] == '-' || buf[0] == '\0') {
> + fprintf(stderr, "invalid vmid %s\n", buf);
> + goto ret;
> + }
> +
> + errno = 0;
> + char *endptr;
I'd prefer this NULL initialized. I don't trust that errors really
always update this to be a valid pointer...
> + vmid = strtoul(buf, &endptr, 10);
> + if (errno != 0) {
> + vmid = 0;
> + goto err;
> + } else if (*endptr != '\0') {
> + fprintf(stderr, "invalid vmid %s\n", buf);
> + vmid = 0;
> + }
> +
> + goto ret;
> + }
> +
> +err:
> + fprintf(stderr, "error parsing vmid for %d: %s\n", pid, strerror(errno));
> +
> +ret:
> + free(buf);
> + fclose(fp);
> + return vmid;
> +}
> +
> +static int must_write(int fd, const char *buf, size_t len)
int -> bool (via <stdbool.h>)
And newline before the function name
> +{
> + ssize_t wlen;
> + do {
> + wlen = write(fd, buf, len);
> + } while (wlen != (ssize_t)len && errno == EINTR);
Use wlen < 0; successful calls don't touch errno, so it's theoretically
possible for this loop to re-write already written data (or would be if
it was writing to a disk - it most likely won't happen here, but
still...).
> +
> + return (wlen == (ssize_t)len);
> +}
> +
> +static void reap_child()
Style issue - but I actually realized that it should be possible to just
set the signal to be _explicitly_ ignored (signal(SIGCLD, SIG_IGN)) to
not produce zombies (this is different to the implicit ignore default
as it is equivalent to calling sigaction(2) with SA_NOCLDWAIT (which
we could do explicitly instead of the old signal() call if we want, up
to you).
> +{
> + pid_t pid;
> + do {
> + pid = waitpid(-1, NULL, WNOHANG);
> + } while (pid > 0);
> +}
> +
> +/*
> + * qmp handling functions
> + */
> +
> +void
> +handle_qmp_handshake(struct Client *client)
> +{
> + VERBOSE_PRINT("%s: got QMP handshake\n", client->vmid);
> + const char *qmp_answer = "{\"execute\":\"qmp_capabilities\"}\n";
static const char[]
You should not use a pointer with sizeof(), I'm surprised this works
(assuming it did for you, I didn't test it ;-) )?
Perhaps qemu doesn't need the handshake? ;-)
> + if (!must_write(client->fd, qmp_answer, sizeof(qmp_answer) - 1)) {
> + fprintf(stderr, "%s: cannot complete handshake\n", client->vmid);
> + cleanup_client(client);
> + }
> +}
> +
> +void
> +handle_qmp_event(struct Client *client, struct json_object *obj)
> +{
> + struct json_object *event;
> + if (!json_object_object_get_ex(obj, "event", &event)) {
> + return;
> + }
> + VERBOSE_PRINT("%s: got QMP event: %s\n", client->vmid,
> + json_object_get_string(event));
> + // event, check if shutdown and get guest parameter
> + if (!strcmp(json_object_get_string(event), "SHUTDOWN")) {
> + client->graceful = 1;
> + struct json_object *data;
> + struct json_object *guest;
> + if (json_object_object_get_ex(obj, "data", &data) &&
> + json_object_object_get_ex(data, "guest", &guest))
> + {
> + client->guest = (unsigned short)json_object_get_boolean(guest);
> + }
> + }
> +}
> +
> +/*
> + * client management functions
> + */
> +
> +void
> +add_new_client(int client_fd)
> +{
> + struct Client *client = calloc(sizeof(struct Client), 1);
> + client->fd = client_fd;
> + client->pid = get_pid_from_fd(client_fd);
While falling through to get_vmid_from_pid(0) in an error case certainly
won't find an `-id` parameter in /proc/0/cmdline, or the file at all for
that matter, I'd like to have either a comment stating this, or
explicit error handling, rather than implicit snowballing ;-)
> + unsigned long vmid = get_vmid_from_pid(client->pid);
> + int res = snprintf(client->vmid, sizeof(client->vmid), "%lu", vmid);
> + if (vmid == 0 || res < 0 || res >= (int)sizeof(client->vmid)) {
> + fprintf(stderr, "could not get vmid from pid %d\n", client->pid);
> + goto err;
> + }
> +
> + struct epoll_event ev;
> + ev.events = EPOLLIN;
> + ev.data.ptr = client;
> + res = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, client_fd, &ev);
> + if (res < 0) {
> + perror("epoll_ctl client add");
> + goto err;
> + }
> +
> + VERBOSE_PRINT("added new client, pid: %d, vmid: %s\n", client->pid,
> + client->vmid);
> +
> + return;
> +err:
> + (void)close(client_fd);
> + free(client);
> +}
> +
> +void
> +cleanup_client(struct Client *client)
> +{
> + VERBOSE_PRINT("%s: client exited, status: graceful: %d, guest: %d\n",
> + client->vmid, client->graceful, client->guest);
> + log_neg(epoll_ctl(epoll_fd, EPOLL_CTL_DEL, client->fd, NULL), "epoll del");
> + (void)close(client->fd);
> +
> + unsigned short graceful = client->graceful;
> + unsigned short guest = client->guest;
> + char vmid[sizeof(client->vmid)];
> + strncpy(vmid, client->vmid, sizeof(vmid));
> + free(client);
> + VERBOSE_PRINT("%s: executing cleanup\n", vmid);
> +
> + int pid = fork();
> + if (pid < 0) {
> + fprintf(stderr, "fork failed: %s\n", strerror(errno));
> + return;
> + }
> + if (pid == 0) {
> + char *script = "/usr/sbin/qm";
> + char **args = malloc((size_t)(6) * sizeof(*args));
I suppose you could consider putting this on the stack
char *args[] = {
...,
NULL
};
> +
> + args[0] = script;
> + args[1] = "cleanup";
> + args[2] = vmid;
> +
> + char shutdown_args[4] = {
> + graceful ? '1' : '0', 0,
> + guest ? '1' : '0', 0,
> + };
> +
> + args[3] = &shutdown_args[0];
> + args[4] = &shutdown_args[2];
> + args[5] = NULL;
> +
> + execvp(script, args);
> + perror("execvp");
> + _exit(1);
> + }
> +}
> +
> +void
> +handle_client(struct Client *client)
> +{
> + VERBOSE_PRINT("%s: entering handle\n", client->vmid);
> + ssize_t len;
> + do {
> + len = read(client->fd, (client->buf+client->buflen),
> + sizeof(client->buf) - client->buflen);
> + } while (len < 0 && errno == EINTR);
> +
> + if (len < 0) {
> + if (!(errno == EAGAIN || errno == EWOULDBLOCK)) {
> + log_neg((int)len, "read");
> + cleanup_client(client);
> + }
> + return;
> + } else if (len == 0) {
> + VERBOSE_PRINT("%s: got EOF\n", client->vmid);
> + cleanup_client(client);
> + return;
> + }
> +
> + VERBOSE_PRINT("%s: read %ld bytes\n", client->vmid, len);
> + client->buflen += len;
> +
> + struct json_tokener *tok = json_tokener_new();
> + struct json_object *jobj = NULL;
> + enum json_tokener_error jerr = json_tokener_success;
> + while (jerr == json_tokener_success && client->buflen != 0) {
> + jobj = json_tokener_parse_ex(tok, client->buf, (int)client->buflen);
> + jerr = json_tokener_get_error(tok);
> + unsigned int offset = (unsigned int)tok->char_offset;
> + switch (jerr) {
> + case json_tokener_success:
> + // move rest from buffer to front
> + memmove(client->buf, client->buf + offset, client->buflen - offset);
> + client->buflen -= offset;
> + if (json_object_is_type(jobj, json_type_object)) {
> + struct json_object *obj;
> + if (json_object_object_get_ex(jobj, "QMP", &obj)) {
> + handle_qmp_handshake(client);
> + } else if (json_object_object_get_ex(jobj, "event", &obj)) {
> + handle_qmp_event(client, jobj);
> + } // else ignore message
> + }
> + break;
> + case json_tokener_continue:
> + if (client->buflen >= sizeof(client->buf)) {
> + VERBOSE_PRINT("%s, msg too large, discarding buffer\n",
> + client->vmid);
> + memset(client->buf, 0, sizeof(client->buf));
> + client->buflen = 0;
> + } // else we have enough space try again after next read
> + break;
> + default:
> + VERBOSE_PRINT("%s: parse error: %d, discarding buffer\n",
> + client->vmid, jerr);
> + memset(client->buf, 0, client->buflen);
> + client->buflen = 0;
> + break;
> + }
> + json_object_put(jobj);
> + }
> + json_tokener_free(tok);
> +}
> +
> +
> +int
> +main(int argc, char *argv[])
> +{
> + int opt;
> + int daemonize = 1;
> + char *socket_path = NULL;
> + progname = argv[0];
> +
> + while ((opt = getopt(argc, argv, "hfv")) != -1) {
> + switch (opt) {
> + case 'f':
> + daemonize = 0;
> + break;
> + case 'v':
> + verbose = 1;
> + break;
> + case 'h':
Something often overlooked:
$ ls --wrong &>/dev/null ; echo $?
2
$ ls --help &>/dev/null ; echo $?
0
When the user passes -h explicitly we should not exit with failure.
PS: strongly encouraged, but optional, is explicit handling of the long
version `--help` ;-)
PPS: And I don't mean like:
$ grep -h
Usage: grep [OPTION]... PATTERN [FILE]...
Try 'grep --help' for more information.
^~~~~~~~~~~~~~~~~
> + default:
> + usage();
> + exit(EXIT_FAILURE);
> + }
> + }
> +
> + if (optind >= argc) {
> + usage();
> + exit(EXIT_FAILURE);
> + }
> +
> + signal(SIGCHLD, reap_child);
> +
> + socket_path = argv[optind];
> +
> + int sock = socket(AF_UNIX, SOCK_STREAM, 0);
> + bail_neg(sock, "socket");
> +
> + struct sockaddr_un addr;
> + memset(&addr, 0, sizeof(addr));
> + addr.sun_family = AF_UNIX;
> + strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1);
> +
> + unlink(socket_path);
> + bail_neg(bind(sock, (struct sockaddr*)&addr, sizeof(addr)), "bind");
> +
> + struct epoll_event ev, events[1];
> + epoll_fd = epoll_create1(EPOLL_CLOEXEC);
> + bail_neg(epoll_fd, "epoll_create1");
> +
> + ev.events = EPOLLIN;
> + ev.data.fd = sock;
> + bail_neg(epoll_ctl(epoll_fd, EPOLL_CTL_ADD, sock, &ev), "epoll_ctl");
> +
> + bail_neg(listen(sock, 10), "listen");
> +
> + if (daemonize) {
> + bail_neg(daemon(0, 1), "daemon");
> + }
> +
> + int should_exit = 0;
> + int nevents, n;
I'd move 'n' down...
> +
> + while(!should_exit) {
I don't see should_exit written anywhere, so `for (;;)` ?
> + nevents = epoll_wait(epoll_fd, events, 1, -1);
> + if (nevents < 0 && errno == EINTR) {
> + // signal happened, try again
> + continue;
> + }
> + bail_neg(nevents, "epoll_wait");
> +
> + for (n = 0; n < nevents; n++) {
... for (int n ...
> + if (events[n].data.fd == sock) {
> +
> + int conn_sock = accept4(sock, NULL, NULL,
> + SOCK_NONBLOCK | SOCK_CLOEXEC);
> + log_neg(conn_sock, "accept");
On error this still falls through to add_new_client().
> +
> + add_new_client(conn_sock);
> + } else {
> + handle_client((struct Client *)events[n].data.ptr);
> + }
> + }
> + }
> +}
> diff --git a/qmeventd.h b/qmeventd.h
> new file mode 100644
> index 0000000..0c2ffc1
> --- /dev/null
> +++ b/qmeventd.h
> @@ -0,0 +1,55 @@
> +/*
> +
> + Copyright (C) 2018 Proxmox Server Solutions GmbH
> +
> + Copyright: qemumonitor is under GNU GPL, the GNU General Public License.
> +
> + This program is free software; you can redistribute it and/or modify
> + it under the terms of the GNU General Public License as published by
> + the Free Software Foundation; version 2 dated June, 1991.
> +
> + This program is distributed in the hope that it will be useful,
> + but WITHOUT ANY WARRANTY; without even the implied warranty of
> + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + GNU General Public License for more details.
> +
> + You should have received a copy of the GNU General Public License
> + along with this program; if not, write to the Free Software
> + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
> + 02111-1307, USA.
> +
> + Author: Dominik Csapak <d.csapak at proxmox.com>
> +*/
> +
> +#define VERBOSE_PRINT(...) do { if (verbose) { printf(__VA_ARGS__); } } while (0)
> +
> +static inline void log_neg(int errval, const char *msg)
> +{
> + if (errval < 0) {
> + perror(msg);
> + }
> +}
> +
> +static inline void bail_neg(int errval, const char *msg)
> +{
> + if (errval < 0) {
> + perror(msg);
> + exit(EXIT_FAILURE);
> + }
> +}
> +
> +struct Client {
> + char buf[4096];
> + char vmid[16];
> + int fd;
> + pid_t pid;
> + unsigned int buflen;
> + unsigned short graceful;
> + unsigned short guest;
> +};
> +
> +void handle_qmp_handshake(struct Client *client);
> +void handle_qmp_event(struct Client *client, struct json_object *obj);
> +void handle_client(struct Client *client);
> +void add_new_client(int client_fd);
> +void cleanup_client(struct Client *client);
> diff --git a/qmeventd.service b/qmeventd.service
> new file mode 100644
> index 0000000..a2462fc
> --- /dev/null
> +++ b/qmeventd.service
> @@ -0,0 +1,9 @@
> +[Unit]
> +Description=PVE Qemu Event Daemon
Should we explicitly order this Before=pve-guests.service? While it's not
strictly required to start guests, I think it would still make sense to
have the service available early.
> +
> +[Service]
> +ExecStart=/usr/sbin/qmeventd /var/run/qemu-server/event.socket
> +Type=forking
> +
> +[Install]
> +WantedBy=multi-user.target
> --
> 2.11.0
More information about the pve-devel
mailing list