[pve-devel] [PATCH 17/31] PVE: internal snapshot async

Dietmar Maurer dietmar at proxmox.com
Fri Mar 6 12:29:57 CET 2020


Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
Signed-off-by: Dietmar Maurer <dietmar at proxmox.com>
---
 Makefile.objs                |   1 +
 hmp-commands-info.hx         |  13 +
 hmp-commands.hx              |  32 +++
 include/migration/snapshot.h |   1 +
 include/monitor/hmp.h        |   5 +
 monitor/hmp-cmds.c           |  57 +++++
 qapi/migration.json          |  34 +++
 qapi/misc.json               |  32 +++
 qemu-options.hx              |  13 +
 savevm-async.c               | 463 +++++++++++++++++++++++++++++++++++
 vl.c                         |  10 +
 11 files changed, 661 insertions(+)
 create mode 100644 savevm-async.c

diff --git a/Makefile.objs b/Makefile.objs
index 11ba1a36bd..f97b40f232 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -48,6 +48,7 @@ common-obj-y += bootdevice.o iothread.o
 common-obj-y += dump/
 common-obj-y += job-qmp.o
 common-obj-y += monitor/
+common-obj-y += savevm-async.o
 common-obj-y += net/
 common-obj-y += qdev-monitor.o device-hotplug.o
 common-obj-$(CONFIG_WIN32) += os-win32.o
diff --git a/hmp-commands-info.hx b/hmp-commands-info.hx
index 257ee7d7a3..139e673bea 100644
--- a/hmp-commands-info.hx
+++ b/hmp-commands-info.hx
@@ -608,6 +608,19 @@ STEXI
 @item info migrate_cache_size
 @findex info migrate_cache_size
 Show current migration xbzrle cache size.
+ETEXI
+
+    {
+        .name       = "savevm",
+        .args_type  = "",
+        .params     = "",
+        .help       = "show savevm status",
+        .cmd = hmp_info_savevm,
+    },
+
+STEXI
+ at item info savevm
+show savevm status
 ETEXI
 
     {
diff --git a/hmp-commands.hx b/hmp-commands.hx
index cfcc044ce4..104288322d 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -1945,3 +1945,35 @@ ETEXI
 STEXI
 @end table
 ETEXI
+
+    {
+        .name       = "savevm-start",
+        .args_type  = "statefile:s?",
+        .params     = "[statefile]",
+        .help       = "Prepare for snapshot and halt VM. Save VM state to statefile.",
+        .cmd = hmp_savevm_start,
+    },
+
+    {
+        .name       = "snapshot-drive",
+        .args_type  = "device:s,name:s",
+        .params     = "device name",
+        .help       = "Create internal snapshot.",
+        .cmd = hmp_snapshot_drive,
+    },
+
+    {
+        .name       = "delete-drive-snapshot",
+        .args_type  = "device:s,name:s",
+        .params     = "device name",
+        .help       = "Delete internal snapshot.",
+        .cmd = hmp_delete_drive_snapshot,
+    },
+
+    {
+        .name       = "savevm-end",
+        .args_type  = "",
+        .params     = "",
+        .help       = "Resume VM after snaphot.",
+        .cmd = hmp_savevm_end,
+    },
diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h
index c85b6ec75b..4411b7121d 100644
--- a/include/migration/snapshot.h
+++ b/include/migration/snapshot.h
@@ -17,5 +17,6 @@
 
 int save_snapshot(const char *name, Error **errp);
 int load_snapshot(const char *name, Error **errp);
+int load_snapshot_from_blockdev(const char *filename, Error **errp);
 
 #endif
diff --git a/include/monitor/hmp.h b/include/monitor/hmp.h
index a0e9511440..c6ee8295f0 100644
--- a/include/monitor/hmp.h
+++ b/include/monitor/hmp.h
@@ -25,6 +25,7 @@ void hmp_info_status(Monitor *mon, const QDict *qdict);
 void hmp_info_uuid(Monitor *mon, const QDict *qdict);
 void hmp_info_chardev(Monitor *mon, const QDict *qdict);
 void hmp_info_mice(Monitor *mon, const QDict *qdict);
+void hmp_info_savevm(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_capabilities(Monitor *mon, const QDict *qdict);
 void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict);
@@ -102,6 +103,10 @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict);
 void hmp_netdev_del(Monitor *mon, const QDict *qdict);
 void hmp_getfd(Monitor *mon, const QDict *qdict);
 void hmp_closefd(Monitor *mon, const QDict *qdict);
+void hmp_savevm_start(Monitor *mon, const QDict *qdict);
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict);
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict);
+void hmp_savevm_end(Monitor *mon, const QDict *qdict);
 void hmp_sendkey(Monitor *mon, const QDict *qdict);
 void hmp_screendump(Monitor *mon, const QDict *qdict);
 void hmp_nbd_server_start(Monitor *mon, const QDict *qdict);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index 2e725ed818..90aa34be25 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -2607,6 +2607,63 @@ void hmp_info_memory_devices(Monitor *mon, const QDict *qdict)
     hmp_handle_error(mon, &err);
 }
 
+void hmp_savevm_start(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *statefile = qdict_get_try_str(qdict, "statefile");
+
+    qmp_savevm_start(statefile != NULL, statefile, &errp);
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_snapshot_drive(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *name = qdict_get_str(qdict, "name");
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_snapshot_drive(device, name, &errp);
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_delete_drive_snapshot(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+    const char *name = qdict_get_str(qdict, "name");
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_delete_drive_snapshot(device, name, &errp);
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_savevm_end(Monitor *mon, const QDict *qdict)
+{
+    Error *errp = NULL;
+
+    qmp_savevm_end(&errp);
+    hmp_handle_error(mon, &errp);
+}
+
+void hmp_info_savevm(Monitor *mon, const QDict *qdict)
+{
+    SaveVMInfo *info;
+    info = qmp_query_savevm(NULL);
+
+    if (info->has_status) {
+        monitor_printf(mon, "savevm status: %s\n", info->status);
+        monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
+                       info->total_time);
+    } else {
+        monitor_printf(mon, "savevm status: not running\n");
+    }
+    if (info->has_bytes) {
+        monitor_printf(mon, "Bytes saved: %"PRIu64"\n", info->bytes);
+    }
+    if (info->has_error) {
+        monitor_printf(mon, "Error: %s\n", info->error);
+    }
+}
+
 void hmp_info_iothreads(Monitor *mon, const QDict *qdict)
 {
     IOThreadInfoList *info_list = qmp_query_iothreads(NULL);
diff --git a/qapi/migration.json b/qapi/migration.json
index b7348d0c8b..2792409977 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -222,6 +222,40 @@
            '*compression': 'CompressionStats',
            '*socket-address': ['SocketAddress'] } }
 
+##
+# @SaveVMInfo:
+#
+# Information about current migration process.
+#
+# @status: string describing the current savevm status.
+#          This can be 'active', 'completed', 'failed'.
+#          If this field is not returned, no savevm process
+#          has been initiated
+#
+# @error: string containing error message is status is failed.
+#
+# @total-time: total amount of milliseconds since savevm started.
+#        If savevm has ended, it returns the total save time
+#
+# @bytes: total amount of data transfered
+#
+# Since: 1.3
+##
+{ 'struct': 'SaveVMInfo',
+  'data': {'*status': 'str', '*error': 'str',
+           '*total-time': 'int', '*bytes': 'int'} }
+
+##
+# @query-savevm:
+#
+# Returns information about current savevm process.
+#
+# Returns: @SaveVMInfo
+#
+# Since: 1.3
+##
+{ 'command': 'query-savevm', 'returns': 'SaveVMInfo' }
+
 ##
 # @query-migrate:
 #
diff --git a/qapi/misc.json b/qapi/misc.json
index ed65ed27e3..4c4618a574 100644
--- a/qapi/misc.json
+++ b/qapi/misc.json
@@ -1368,6 +1368,38 @@
 ##
 { 'command': 'query-fdsets', 'returns': ['FdsetInfo'] }
 
+##
+# @savevm-start:
+#
+# Prepare for snapshot and halt VM. Save VM state to statefile.
+#
+##
+{ 'command': 'savevm-start', 'data': { '*statefile': 'str' } }
+
+##
+# @snapshot-drive:
+#
+# Create an internal drive snapshot.
+#
+##
+{ 'command': 'snapshot-drive', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @delete-drive-snapshot:
+#
+# Delete a drive snapshot.
+#
+##
+{ 'command': 'delete-drive-snapshot', 'data': { 'device': 'str', 'name': 'str' } }
+
+##
+# @savevm-end:
+#
+# Resume VM after a snapshot.
+#
+##
+{ 'command': 'savevm-end' }
+
 ##
 # @AcpiTableOptions:
 #
diff --git a/qemu-options.hx b/qemu-options.hx
index 65c9473b73..4cb2681bfc 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -3818,6 +3818,19 @@ STEXI
 Start right away with a saved state (@code{loadvm} in monitor)
 ETEXI
 
+DEF("loadstate", HAS_ARG, QEMU_OPTION_loadstate, \
+    "-loadstate file\n" \
+    "                start right away with a saved state\n",
+    QEMU_ARCH_ALL)
+STEXI
+ at item -loadstate @var{file}
+ at findex -loadstate
+Start right away with a saved state. This option does not rollback
+disk state like @code{loadvm}, so user must make sure that disk
+have correct state. @var{file} can be any valid device URL. See the section
+for "Device URL Syntax" for more information.
+ETEXI
+
 #ifndef _WIN32
 DEF("daemonize", 0, QEMU_OPTION_daemonize, \
     "-daemonize      daemonize QEMU after initializing\n", QEMU_ARCH_ALL)
diff --git a/savevm-async.c b/savevm-async.c
new file mode 100644
index 0000000000..5a20009b9a
--- /dev/null
+++ b/savevm-async.c
@@ -0,0 +1,463 @@
+#include "qemu/osdep.h"
+#include "migration/migration.h"
+#include "migration/savevm.h"
+#include "migration/snapshot.h"
+#include "migration/global_state.h"
+#include "migration/ram.h"
+#include "migration/qemu-file.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/runstate.h"
+#include "block/block.h"
+#include "sysemu/block-backend.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qapi/qmp/qdict.h"
+#include "qapi/qapi-commands-migration.h"
+#include "qapi/qapi-commands-misc.h"
+#include "qapi/qapi-commands-block.h"
+#include "qemu/cutils.h"
+#include "qemu/main-loop.h"
+#include "qemu/rcu.h"
+
+/* #define DEBUG_SAVEVM_STATE */
+
+/* used while emulated sync operation in progress */
+#define NOT_DONE -EINPROGRESS
+
+#ifdef DEBUG_SAVEVM_STATE
+#define DPRINTF(fmt, ...) \
+    do { printf("savevm-async: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+enum {
+    SAVE_STATE_DONE,
+    SAVE_STATE_ERROR,
+    SAVE_STATE_ACTIVE,
+    SAVE_STATE_COMPLETED,
+    SAVE_STATE_CANCELLED
+};
+
+
+static struct SnapshotState {
+    BlockBackend *target;
+    size_t bs_pos;
+    int state;
+    Error *error;
+    Error *blocker;
+    int saved_vm_running;
+    QEMUFile *file;
+    int64_t total_time;
+    QEMUBH *cleanup_bh;
+    QemuThread thread;
+} snap_state;
+
+SaveVMInfo *qmp_query_savevm(Error **errp)
+{
+    SaveVMInfo *info = g_malloc0(sizeof(*info));
+    struct SnapshotState *s = &snap_state;
+
+    if (s->state != SAVE_STATE_DONE) {
+        info->has_bytes = true;
+        info->bytes = s->bs_pos;
+        switch (s->state) {
+        case SAVE_STATE_ERROR:
+            info->has_status = true;
+            info->status = g_strdup("failed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            if (s->error) {
+                info->has_error = true;
+                info->error = g_strdup(error_get_pretty(s->error));
+            }
+            break;
+        case SAVE_STATE_ACTIVE:
+            info->has_status = true;
+            info->status = g_strdup("active");
+            info->has_total_time = true;
+            info->total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME)
+                - s->total_time;
+            break;
+        case SAVE_STATE_COMPLETED:
+            info->has_status = true;
+            info->status = g_strdup("completed");
+            info->has_total_time = true;
+            info->total_time = s->total_time;
+            break;
+        }
+    }
+
+    return info;
+}
+
+static int save_snapshot_cleanup(void)
+{
+    int ret = 0;
+
+    DPRINTF("save_snapshot_cleanup\n");
+
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) -
+        snap_state.total_time;
+
+    if (snap_state.file) {
+        ret = qemu_fclose(snap_state.file);
+    }
+
+    if (snap_state.target) {
+        /* try to truncate, but ignore errors (will fail on block devices).
+         * note: bdrv_read() need whole blocks, so we round up
+         */
+        size_t size = QEMU_ALIGN_UP(snap_state.bs_pos, BDRV_SECTOR_SIZE);
+        blk_truncate(snap_state.target, size, false, PREALLOC_MODE_OFF, NULL);
+        blk_op_unblock_all(snap_state.target, snap_state.blocker);
+        error_free(snap_state.blocker);
+        snap_state.blocker = NULL;
+        blk_unref(snap_state.target);
+        snap_state.target = NULL;
+    }
+
+    return ret;
+}
+
+static void save_snapshot_error(const char *fmt, ...)
+{
+    va_list ap;
+    char *msg;
+
+    va_start(ap, fmt);
+    msg = g_strdup_vprintf(fmt, ap);
+    va_end(ap);
+
+    DPRINTF("save_snapshot_error: %s\n", msg);
+
+    if (!snap_state.error) {
+        error_set(&snap_state.error, ERROR_CLASS_GENERIC_ERROR, "%s", msg);
+    }
+
+    g_free (msg);
+
+    snap_state.state = SAVE_STATE_ERROR;
+}
+
+static int block_state_close(void *opaque, Error **errp)
+{
+    snap_state.file = NULL;
+    return blk_flush(snap_state.target);
+}
+
+typedef struct BlkRwCo {
+    int64_t offset;
+    QEMUIOVector *qiov;
+    ssize_t ret;
+} BlkRwCo;
+
+static void coroutine_fn block_state_write_entry(void *opaque) {
+    BlkRwCo *rwco = opaque;
+    rwco->ret = blk_co_pwritev(snap_state.target, rwco->offset, rwco->qiov->size,
+                               rwco->qiov, 0);
+}
+
+static ssize_t block_state_writev_buffer(void *opaque, struct iovec *iov,
+                                         int iovcnt, int64_t pos, Error **errp)
+{
+    QEMUIOVector qiov;
+    BlkRwCo rwco;
+
+    assert(pos == snap_state.bs_pos);
+    rwco = (BlkRwCo) {
+        .offset = pos,
+        .qiov = &qiov,
+        .ret = NOT_DONE,
+    };
+
+    qemu_iovec_init_external(&qiov, iov, iovcnt);
+
+    if (qemu_in_coroutine()) {
+        block_state_write_entry(&rwco);
+    } else {
+        Coroutine *co = qemu_coroutine_create(&block_state_write_entry, &rwco);
+        bdrv_coroutine_enter(blk_bs(snap_state.target), co);
+        BDRV_POLL_WHILE(blk_bs(snap_state.target), rwco.ret == NOT_DONE);
+    }
+    if (rwco.ret < 0) {
+        return rwco.ret;
+    }
+
+    snap_state.bs_pos += qiov.size;
+    return qiov.size;
+}
+
+static const QEMUFileOps block_file_ops = {
+    .writev_buffer =  block_state_writev_buffer,
+    .close =          block_state_close,
+};
+
+static void process_savevm_cleanup(void *opaque)
+{
+    int ret;
+    qemu_bh_delete(snap_state.cleanup_bh);
+    snap_state.cleanup_bh = NULL;
+    qemu_mutex_unlock_iothread();
+    qemu_thread_join(&snap_state.thread);
+    qemu_mutex_lock_iothread();
+    ret = save_snapshot_cleanup();
+    if (ret < 0) {
+        save_snapshot_error("save_snapshot_cleanup error %d", ret);
+    } else if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_COMPLETED;
+    } else {
+        save_snapshot_error("process_savevm_cleanup: invalid state: %d",
+                            snap_state.state);
+    }
+    if (snap_state.saved_vm_running) {
+        vm_start();
+        snap_state.saved_vm_running = false;
+    }
+}
+
+static void *process_savevm_thread(void *opaque)
+{
+    int ret;
+    int64_t maxlen;
+
+    rcu_register_thread();
+
+    qemu_savevm_state_header(snap_state.file);
+    qemu_savevm_state_setup(snap_state.file);
+    ret = qemu_file_get_error(snap_state.file);
+
+    if (ret < 0) {
+        save_snapshot_error("qemu_savevm_state_setup failed");
+        rcu_unregister_thread();
+        return NULL;
+    }
+
+    while (snap_state.state == SAVE_STATE_ACTIVE) {
+        uint64_t pending_size, pend_precopy, pend_compatible, pend_postcopy;
+
+        qemu_savevm_state_pending(snap_state.file, 0, &pend_precopy, &pend_compatible, &pend_postcopy);
+        pending_size = pend_precopy + pend_compatible + pend_postcopy;
+
+        maxlen = blk_getlength(snap_state.target) - 30*1024*1024;
+
+        if (pending_size > 400000 && snap_state.bs_pos + pending_size < maxlen) {
+            qemu_mutex_lock_iothread();
+            ret = qemu_savevm_state_iterate(snap_state.file, false);
+            if (ret < 0) {
+                save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+                break;
+            }
+            qemu_mutex_unlock_iothread();
+            DPRINTF("savevm inerate pending size %lu ret %d\n", pending_size, ret);
+        } else {
+            qemu_mutex_lock_iothread();
+            qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL);
+            ret = global_state_store();
+            if (ret) {
+                save_snapshot_error("global_state_store error %d", ret);
+                break;
+            }
+            ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
+            if (ret < 0) {
+                save_snapshot_error("vm_stop_force_state error %d", ret);
+                break;
+            }
+            DPRINTF("savevm inerate finished\n");
+            /* upstream made the return value here inconsistent
+             * (-1 instead of 'ret' in one case and 0 after flush which can
+             * still set a file error...)
+             */
+            (void)qemu_savevm_state_complete_precopy(snap_state.file, false, false);
+            ret = qemu_file_get_error(snap_state.file);
+            if (ret < 0) {
+                    save_snapshot_error("qemu_savevm_state_iterate error %d", ret);
+                    break;
+            }
+            qemu_savevm_state_cleanup();
+            DPRINTF("save complete\n");
+            break;
+        }
+    }
+
+    qemu_bh_schedule(snap_state.cleanup_bh);
+    qemu_mutex_unlock_iothread();
+
+    rcu_unregister_thread();
+    return NULL;
+}
+
+void qmp_savevm_start(bool has_statefile, const char *statefile, Error **errp)
+{
+    Error *local_err = NULL;
+
+    int bdrv_oflags = BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_NO_FLUSH;
+
+    if (snap_state.state != SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot already started\n");
+        return;
+    }
+
+    /* initialize snapshot info */
+    snap_state.saved_vm_running = runstate_is_running();
+    snap_state.bs_pos = 0;
+    snap_state.total_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
+    snap_state.blocker = NULL;
+
+    if (snap_state.error) {
+        error_free(snap_state.error);
+        snap_state.error = NULL;
+    }
+
+    if (!has_statefile) {
+        vm_stop(RUN_STATE_SAVE_VM);
+        snap_state.state = SAVE_STATE_COMPLETED;
+        return;
+    }
+
+    if (qemu_savevm_state_blocked(errp)) {
+        return;
+    }
+
+    /* Open the image */
+    QDict *options = NULL;
+    options = qdict_new();
+    qdict_put_str(options, "driver", "raw");
+    snap_state.target = blk_new_open(statefile, NULL, options, bdrv_oflags, &local_err);
+    if (!snap_state.target) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+    snap_state.file = qemu_fopen_ops(&snap_state, &block_file_ops);
+
+    if (!snap_state.file) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "failed to open '%s'", statefile);
+        goto restart;
+    }
+
+
+    error_setg(&snap_state.blocker, "block device is in use by savevm");
+    blk_op_block_all(snap_state.target, snap_state.blocker);
+
+    snap_state.state = SAVE_STATE_ACTIVE;
+    snap_state.cleanup_bh = qemu_bh_new(process_savevm_cleanup, &snap_state);
+    qemu_thread_create(&snap_state.thread, "savevm-async", process_savevm_thread,
+                       NULL, QEMU_THREAD_JOINABLE);
+
+    return;
+
+restart:
+
+    save_snapshot_error("setup failed");
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+    }
+}
+
+void qmp_savevm_end(Error **errp)
+{
+    if (snap_state.state == SAVE_STATE_DONE) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                  "VM snapshot not started\n");
+        return;
+    }
+
+    if (snap_state.state == SAVE_STATE_ACTIVE) {
+        snap_state.state = SAVE_STATE_CANCELLED;
+        return;
+    }
+
+    if (snap_state.saved_vm_running) {
+        vm_start();
+    }
+
+    snap_state.state = SAVE_STATE_DONE;
+}
+
+// FIXME: Deprecated
+void qmp_snapshot_drive(const char *device, const char *name, Error **errp)
+{
+    // Compatibility to older qemu-server.
+    qmp_blockdev_snapshot_internal_sync(device, name, errp);
+}
+
+// FIXME: Deprecated
+void qmp_delete_drive_snapshot(const char *device, const char *name,
+                               Error **errp)
+{
+    // Compatibility to older qemu-server.
+    (void)qmp_blockdev_snapshot_delete_internal_sync(device, false, NULL,
+                                                     true, name, errp);
+}
+
+static ssize_t loadstate_get_buffer(void *opaque, uint8_t *buf, int64_t pos,
+                                    size_t size, Error **errp)
+{
+    BlockBackend *be = opaque;
+    int64_t maxlen = blk_getlength(be);
+    if (pos > maxlen) {
+        return -EIO;
+    }
+    if ((pos + size) > maxlen) {
+        size = maxlen - pos - 1;
+    }
+    if (size == 0) {
+        return 0;
+    }
+    return blk_pread(be, pos, buf, size);
+}
+
+static const QEMUFileOps loadstate_file_ops = {
+    .get_buffer = loadstate_get_buffer,
+};
+
+int load_snapshot_from_blockdev(const char *filename, Error **errp)
+{
+    BlockBackend *be;
+    Error *local_err = NULL;
+    Error *blocker = NULL;
+
+    QEMUFile *f;
+    int ret = -EINVAL;
+
+    be = blk_new_open(filename, NULL, NULL, 0, &local_err);
+
+    if (!be) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    error_setg(&blocker, "block device is in use by load state");
+    blk_op_block_all(be, blocker);
+
+    /* restore the VM state */
+    f = qemu_fopen_ops(be, &loadstate_file_ops);
+    if (!f) {
+        error_setg(errp, "Could not open VM state file");
+        goto the_end;
+    }
+
+    qemu_system_reset(SHUTDOWN_CAUSE_NONE);
+    ret = qemu_loadvm_state(f);
+
+    qemu_fclose(f);
+    migration_incoming_state_destroy();
+    if (ret < 0) {
+        error_setg_errno(errp, -ret, "Error while loading VM state");
+        goto the_end;
+    }
+
+    ret = 0;
+
+ the_end:
+    if (be) {
+        blk_op_unblock_all(be, blocker);
+        error_free(blocker);
+        blk_unref(be);
+    }
+    return ret;
+}
diff --git a/vl.c b/vl.c
index 6a65a64bfd..1616f55a38 100644
--- a/vl.c
+++ b/vl.c
@@ -2840,6 +2840,7 @@ int main(int argc, char **argv, char **envp)
     int optind;
     const char *optarg;
     const char *loadvm = NULL;
+    const char *loadstate = NULL;
     MachineClass *machine_class;
     const char *cpu_option;
     const char *vga_model = NULL;
@@ -3430,6 +3431,9 @@ int main(int argc, char **argv, char **envp)
             case QEMU_OPTION_loadvm:
                 loadvm = optarg;
                 break;
+            case QEMU_OPTION_loadstate:
+                loadstate = optarg;
+                break;
             case QEMU_OPTION_full_screen:
                 dpy.has_full_screen = true;
                 dpy.full_screen = true;
@@ -4442,6 +4446,12 @@ int main(int argc, char **argv, char **envp)
             autostart = 0;
             exit(1);
         }
+    } else if (loadstate) {
+        Error *local_err = NULL;
+        if (load_snapshot_from_blockdev(loadstate, &local_err) < 0) {
+            error_report_err(local_err);
+            autostart = 0;
+        }
     }
     if (replay_mode != REPLAY_MODE_NONE) {
         replay_vmstate_init();
-- 
2.20.1




More information about the pve-devel mailing list