[pve-devel] [RFC PATCH kvm] add the zeroinit block driver filter

Alexandre DERUMIER aderumier at odiso.com
Mon Mar 21 10:21:10 CET 2016


Hi,

Could this works with ceph rbd storage as target ?

currently, rbd driver don't have write zeroes implemented.


----- Mail original -----
De: "Wolfgang Bumiller" <w.bumiller at proxmox.com>
À: "pve-devel" <pve-devel at pve.proxmox.com>
Envoyé: Vendredi 18 Mars 2016 12:17:39
Objet: [pve-devel] [RFC PATCH kvm] add the zeroinit block driver filter

--- 
This is a block driver filter (or hack...) to force the assumption of 
a zero-initialized storage (has_zero_init) and skip appending 
write_zero() requests. 

We can use this for clone_disk() when converting between lvm-thin and 
other storage types. 

To use this the destination of a qemu-img or 'qmp drive-mirror' 
command is prefixed with 'zeroinit:' 

debian/patches/pve/0044-block-add-zeroinit.patch | 248 +++++++++++++++++++++++ 
debian/patches/series | 1 + 
2 files changed, 249 insertions(+) 
create mode 100644 debian/patches/pve/0044-block-add-zeroinit.patch 

diff --git a/debian/patches/pve/0044-block-add-zeroinit.patch b/debian/patches/pve/0044-block-add-zeroinit.patch 
new file mode 100644 
index 0000000..03fc92a 
--- /dev/null 
+++ b/debian/patches/pve/0044-block-add-zeroinit.patch 
@@ -0,0 +1,248 @@ 
+From 8b996ca2740d40027652ea63f52bed8f9271331b Mon Sep 17 00:00:00 2001 
+From: Wolfgang Bumiller <w.bumiller at proxmox.com> 
+Date: Thu, 17 Mar 2016 11:33:37 +0100 
+Subject: [PATCH] block: add the zeroinit block driver filter 
+ 
+--- 
+ block/Makefile.objs | 1 + 
+ block/zeroinit.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 
+ 2 files changed, 217 insertions(+) 
+ create mode 100644 block/zeroinit.c 
+ 
+diff --git a/block/Makefile.objs b/block/Makefile.objs 
+index 58ef2ef..5bb55b0 100644 
+--- a/block/Makefile.objs 
++++ b/block/Makefile.objs 
+@@ -4,6 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o 
+ block-obj-y += qed-check.o 
+ block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o 
+ block-obj-y += quorum.o 
++block-obj-y += zeroinit.o 
+ block-obj-y += parallels.o blkdebug.o blkverify.o 
+ block-obj-y += block-backend.o snapshot.o qapi.o 
+ block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o 
+diff --git a/block/zeroinit.c b/block/zeroinit.c 
+new file mode 100644 
+index 0000000..04a9bd0 
+--- /dev/null 
++++ b/block/zeroinit.c 
+@@ -0,0 +1,216 @@ 
++/* 
++ * Filter to fake a zero-initialized block device. 
++ * 
++ * Copyright (c) 2016 Wolfgang Bumiller <w.bumiller at proxmox.com> 
++ * Copyright (c) 2016 Proxmox Server Solutions GmbH 
++ * 
++ * This work is licensed under the terms of the GNU GPL, version 2 or later. 
++ * See the COPYING file in the top-level directory. 
++ */ 
++ 
++#include "block/block_int.h" 
++#include "qapi/qmp/qdict.h" 
++#include "qapi/qmp/qstring.h" 
++ 
++typedef struct { 
++ bool has_zero_init; 
++ int64_t extents; 
++} BDRVZeroinitState; 
++ 
++/* Valid blkverify filenames look like blkverify:path/to/raw_image:path/to/image */ 
++static void zeroinit_parse_filename(const char *filename, QDict *options, 
++ Error **errp) 
++{ 
++ QString *raw_path; 
++ 
++ /* Parse the blkverify: prefix */ 
++ if (!strstart(filename, "zeroinit:", &filename)) { 
++ /* There was no prefix; therefore, all options have to be already 
++ present in the QDict (except for the filename) */ 
++ return; 
++ } 
++ 
++ raw_path = qstring_from_str(filename); 
++ qdict_put(options, "x-next", raw_path); 
++} 
++ 
++static QemuOptsList runtime_opts = { 
++ .name = "zeroinit", 
++ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), 
++ .desc = { 
++ { 
++ .name = "x-next", 
++ .type = QEMU_OPT_STRING, 
++ .help = "[internal use only, will be removed]", 
++ }, 
++ { 
++ .name = "x-zeroinit", 
++ .type = QEMU_OPT_BOOL, 
++ .help = "set has_initialized_zero flag", 
++ }, 
++ { /* end of list */ } 
++ }, 
++}; 
++ 
++static int zeroinit_open(BlockDriverState *bs, QDict *options, int flags, 
++ Error **errp) 
++{ 
++ BDRVZeroinitState *s = bs->opaque; 
++ QemuOpts *opts; 
++ Error *local_err = NULL; 
++ int ret; 
++ 
++ s->extents = 0; 
++ 
++ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort); 
++ qemu_opts_absorb_qdict(opts, options, &local_err); 
++ if (local_err) { 
++ error_propagate(errp, local_err); 
++ ret = -EINVAL; 
++ goto fail; 
++ } 
++ 
++ /* Open the raw file */ 
++ bs->file = bdrv_open_child(qemu_opt_get(opts, "x-next"), options, "next", 
++ bs, &child_file, false, &local_err); 
++ if (local_err) { 
++ ret = -EINVAL; 
++ error_propagate(errp, local_err); 
++ goto fail; 
++ } 
++ 
++ /* set the options */ 
++ s->has_zero_init = qemu_opt_get_bool(opts, "x-zeroinit", true); 
++ 
++ ret = 0; 
++fail: 
++ if (ret < 0) { 
++ bdrv_unref_child(bs, bs->file); 
++ } 
++ qemu_opts_del(opts); 
++ return ret; 
++} 
++ 
++static void zeroinit_close(BlockDriverState *bs) 
++{ 
++ BDRVZeroinitState *s = bs->opaque; 
++ (void)s; 
++} 
++ 
++static int64_t zeroinit_getlength(BlockDriverState *bs) 
++{ 
++ return bdrv_getlength(bs->file->bs); 
++} 
++ 
++static BlockAIOCB *zeroinit_aio_readv(BlockDriverState *bs, 
++ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 
++ BlockCompletionFunc *cb, void *opaque) 
++{ 
++ return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors, 
++ cb, opaque); 
++} 
++ 
++static int coroutine_fn zeroinit_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, 
++ int nb_sectors, BdrvRequestFlags flags) 
++{ 
++ BDRVZeroinitState *s = bs->opaque; 
++ if (sector_num >= s->extents) 
++ return 0; 
++ return bdrv_write_zeroes(bs->file->bs, sector_num, nb_sectors, flags); 
++} 
++ 
++static BlockAIOCB *zeroinit_aio_writev(BlockDriverState *bs, 
++ int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, 
++ BlockCompletionFunc *cb, void *opaque) 
++{ 
++ BDRVZeroinitState *s = bs->opaque; 
++ int64_t extents = sector_num + nb_sectors + 1; 
++ if (extents > s->extents) 
++ s->extents = extents; 
++ return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors, 
++ cb, opaque); 
++} 
++ 
++static BlockAIOCB *zeroinit_aio_flush(BlockDriverState *bs, 
++ BlockCompletionFunc *cb, 
++ void *opaque) 
++{ 
++ return bdrv_aio_flush(bs->file->bs, cb, opaque); 
++} 
++ 
++static bool zeroinit_recurse_is_first_non_filter(BlockDriverState *bs, 
++ BlockDriverState *candidate) 
++{ 
++ return bdrv_recurse_is_first_non_filter(bs->file->bs, candidate); 
++} 
++ 
++static coroutine_fn int zeroinit_co_flush(BlockDriverState *bs) 
++{ 
++ return bdrv_co_flush(bs->file->bs); 
++} 
++ 
++static int zeroinit_has_zero_init(BlockDriverState *bs) 
++{ 
++ BDRVZeroinitState *s = bs->opaque; 
++ return s->has_zero_init; 
++} 
++ 
++static int64_t coroutine_fn zeroinit_co_get_block_status(BlockDriverState *bs, 
++ int64_t sector_num, 
++ int nb_sectors, int *pnum) 
++{ 
++ return bdrv_get_block_status(bs->file->bs, sector_num, nb_sectors, pnum); 
++} 
++ 
++static coroutine_fn BlockAIOCB *zeroinit_aio_discard(BlockDriverState *bs, 
++ int64_t sector_num, int nb_sectors, 
++ BlockCompletionFunc *cb, void *opaque) 
++{ 
++ return bdrv_aio_discard(bs->file->bs, sector_num, nb_sectors, cb, opaque); 
++} 
++ 
++static int zeroinit_truncate(BlockDriverState *bs, int64_t offset) 
++{ 
++ return bdrv_truncate(bs->file->bs, offset); 
++} 
++ 
++static int zeroinit_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) 
++{ 
++ return bdrv_get_info(bs->file->bs, bdi); 
++} 
++ 
++static BlockDriver bdrv_zeroinit = { 
++ .format_name = "zeroinit", 
++ .protocol_name = "zeroinit", 
++ .instance_size = sizeof(BDRVZeroinitState), 
++ 
++ .bdrv_parse_filename = zeroinit_parse_filename, 
++ .bdrv_file_open = zeroinit_open, 
++ .bdrv_close = zeroinit_close, 
++ .bdrv_getlength = zeroinit_getlength, 
++ .bdrv_co_flush_to_disk = zeroinit_co_flush, 
++ 
++ .bdrv_co_write_zeroes = zeroinit_co_write_zeroes, 
++ .bdrv_aio_writev = zeroinit_aio_writev, 
++ .bdrv_aio_readv = zeroinit_aio_readv, 
++ .bdrv_aio_flush = zeroinit_aio_flush, 
++ 
++ .is_filter = true, 
++ .bdrv_recurse_is_first_non_filter = zeroinit_recurse_is_first_non_filter, 
++ 
++ .bdrv_has_zero_init = zeroinit_has_zero_init, 
++ 
++ .bdrv_co_get_block_status = zeroinit_co_get_block_status, 
++ 
++ .bdrv_aio_discard = zeroinit_aio_discard, 
++ 
++ .bdrv_truncate = zeroinit_truncate, 
++ .bdrv_get_info = zeroinit_get_info, 
++}; 
++ 
++static void bdrv_zeroinit_init(void) 
++{ 
++ bdrv_register(&bdrv_zeroinit); 
++} 
++ 
++block_init(bdrv_zeroinit_init); 
+-- 
+2.1.4 
+ 
diff --git a/debian/patches/series b/debian/patches/series 
index f07e028..e8f1710 100644 
--- a/debian/patches/series 
+++ b/debian/patches/series 
@@ -41,6 +41,7 @@ pve/0040-vnc-make-x509-imply-tls-again.patch 
pve/0041-PVE-VNC-authentication.patch 
pve/0042-vma-writer-don-t-bail-out-on-zero-length-files.patch 
pve/0043-vma-better-driver-guessing-for-bdrv_open.patch 
+pve/0044-block-add-zeroinit.patch 
extra/CVE-2015-8558-ehci_make_idt_processing_more_robust.patch 
extra/CVE-2015-8613-scsi-initialize-info-object.patch 
extra/CVE-2015-8701-net-rocker-off-by-one.patch 
-- 
2.1.4 


_______________________________________________ 
pve-devel mailing list 
pve-devel at pve.proxmox.com 
http://pve.proxmox.com/cgi-bin/mailman/listinfo/pve-devel 




More information about the pve-devel mailing list