[pve-devel] [PATCH zfsonlinux 1/2] update zfs submodule to 2.2.2 and refresh patches

Stoiko Ivanov s.ivanov at proxmox.com
Mon Dec 4 12:27:51 CET 2023


the removed patches were cherry-picks, which are included in 2.2.2

Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
 ...-move-manpage-arcstat-1-to-arcstat-8.patch |   2 +-
 ...-guard-access-to-l2arc-MFU-MRU-stats.patch |   2 +-
 ...uncate_shares-without-etc-exports.d.patch} |   0
 ...ol-Remove-broken-blk-mq-optimization.patch |  99 ---------
 ...evert-zvol-Temporally-disable-blk-mq.patch | 123 -----------
 ...ten-bounds-for-noalloc-stat-availab.patch} |   0
 ...und-UBSAN-errors-for-variable-arrays.patch |  72 -------
 ...g-between-unencrypted-and-encrypted-.patch |  44 ----
 ...Add-a-tunable-to-disable-BRT-support.patch | 201 ------------------
 ...2.1-Disable-block-cloning-by-default.patch |  42 ----
 ...heck-dnode-and-its-data-for-dirtines.patch |  97 ---------
 debian/patches/series                         |  11 +-
 upstream                                      |   2 +-
 13 files changed, 5 insertions(+), 690 deletions(-)
 rename debian/patches/{0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch => 0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch} (100%)
 delete mode 100644 debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch
 delete mode 100644 debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch
 rename debian/patches/{0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch => 0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch} (100%)
 delete mode 100644 debian/patches/0013-Workaround-UBSAN-errors-for-variable-arrays.patch
 delete mode 100644 debian/patches/0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
 delete mode 100644 debian/patches/0016-Add-a-tunable-to-disable-BRT-support.patch
 delete mode 100644 debian/patches/0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
 delete mode 100644 debian/patches/0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch

diff --git a/debian/patches/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch b/debian/patches/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
index b21a301f..c11c1ae8 100644
--- a/debian/patches/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
+++ b/debian/patches/0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
@@ -15,7 +15,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
  rename man/{man1/arcstat.1 => man8/arcstat.8} (99%)
 
 diff --git a/man/Makefile.am b/man/Makefile.am
-index 36c1aede1..94fd96e58 100644
+index 45156571e..3713e9371 100644
 --- a/man/Makefile.am
 +++ b/man/Makefile.am
 @@ -2,7 +2,6 @@ dist_noinst_man_MANS = \
diff --git a/debian/patches/0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch b/debian/patches/0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
index fde0529a..f8cb3539 100644
--- a/debian/patches/0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
+++ b/debian/patches/0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
@@ -27,7 +27,7 @@ Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
  2 files changed, 21 insertions(+), 21 deletions(-)
 
 diff --git a/cmd/arc_summary b/cmd/arc_summary
-index 426e02070..9de198150 100755
+index 9c69ec4f8..edf94ea2a 100755
 --- a/cmd/arc_summary
 +++ b/cmd/arc_summary
 @@ -655,13 +655,13 @@ def section_arc(kstats_dict):
diff --git a/debian/patches/0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch b/debian/patches/0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
similarity index 100%
rename from debian/patches/0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch
rename to debian/patches/0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
diff --git a/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch b/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch
deleted file mode 100644
index 178e68ee..00000000
--- a/debian/patches/0010-zvol-Remove-broken-blk-mq-optimization.patch
+++ /dev/null
@@ -1,99 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tony Hutter <hutter2 at llnl.gov>
-Date: Mon, 23 Oct 2023 14:45:06 -0700
-Subject: [PATCH] zvol: Remove broken blk-mq optimization
-
-This fix removes a dubious optimization in zfs_uiomove_bvec_rq()
-that saved the iterator contents of a rq_for_each_segment().  This
-optimization allowed restoring the "saved state" from a previous
-rq_for_each_segment() call on the same uio so that you wouldn't
-need to iterate though each bvec on every zfs_uiomove_bvec_rq() call.
-However, if the kernel is manipulating the requests/bios/bvecs under
-the covers between zfs_uiomove_bvec_rq() calls, then it could result
-in corruption from using the "saved state".  This optimization
-results in an unbootable system after installing an OS on a zvol
-with blk-mq enabled.
-
-Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
-Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
-Closes #15351
-(cherry picked from commit 7c9b6fed16ed5034fd1cdfdaedfad93dc97b1557)
-Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
----
- include/os/linux/spl/sys/uio.h |  8 --------
- module/os/linux/zfs/zfs_uio.c  | 29 -----------------------------
- 2 files changed, 37 deletions(-)
-
-diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h
-index cce097e16..a4b600004 100644
---- a/include/os/linux/spl/sys/uio.h
-+++ b/include/os/linux/spl/sys/uio.h
-@@ -73,13 +73,6 @@ typedef struct zfs_uio {
- 	size_t		uio_skip;
- 
- 	struct request	*rq;
--
--	/*
--	 * Used for saving rq_for_each_segment() state between calls
--	 * to zfs_uiomove_bvec_rq().
--	 */
--	struct req_iterator iter;
--	struct bio_vec bv;
- } zfs_uio_t;
- 
- 
-@@ -138,7 +131,6 @@ zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
- 	} else {
- 		uio->uio_bvec = NULL;
- 		uio->uio_iovcnt = 0;
--		memset(&uio->iter, 0, sizeof (uio->iter));
- 	}
- 
- 	uio->uio_loffset = io_offset(bio, rq);
-diff --git a/module/os/linux/zfs/zfs_uio.c b/module/os/linux/zfs/zfs_uio.c
-index 3efd4ab15..c2ed67c43 100644
---- a/module/os/linux/zfs/zfs_uio.c
-+++ b/module/os/linux/zfs/zfs_uio.c
-@@ -204,22 +204,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
- 	this_seg_start = orig_loffset;
- 
- 	rq_for_each_segment(bv, rq, iter) {
--		if (uio->iter.bio) {
--			/*
--			 * If uio->iter.bio is present, then we know we've saved
--			 * uio->iter from a previous call to this function, and
--			 * we can skip ahead in this rq_for_each_segment() loop
--			 * to where we last left off.  That way, we don't need
--			 * to iterate over tons of segments we've already
--			 * processed - we can just restore the "saved state".
--			 */
--			iter = uio->iter;
--			bv = uio->bv;
--			this_seg_start = uio->uio_loffset;
--			memset(&uio->iter, 0, sizeof (uio->iter));
--			continue;
--		}
--
- 		/*
- 		 * Lookup what the logical offset of the last byte of this
- 		 * segment is.
-@@ -260,19 +244,6 @@ zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
- 			copied = 1;	/* We copied some data */
- 		}
- 
--		if (n == 0) {
--			/*
--			 * All done copying.  Save our 'iter' value to the uio.
--			 * This allows us to "save our state" and skip ahead in
--			 * the rq_for_each_segment() loop the next time we call
--			 * call zfs_uiomove_bvec_rq() on this uio (which we
--			 * will be doing for any remaining data in the uio).
--			 */
--			uio->iter = iter; /* make a copy of the struct data */
--			uio->bv = bv;
--			return (0);
--		}
--
- 		this_seg_start = this_seg_end + 1;
- 	}
- 
diff --git a/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch b/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch
deleted file mode 100644
index 2671cda6..00000000
--- a/debian/patches/0011-Revert-zvol-Temporally-disable-blk-mq.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tony Hutter <hutter2 at llnl.gov>
-Date: Mon, 23 Oct 2023 14:39:59 -0700
-Subject: [PATCH] Revert "zvol: Temporally disable blk-mq"
-
-This reverts commit aefb6a2bd6c24597cde655e9ce69edd0a4c34357.
-
-aefb6a2bd temporally disabled blk-mq until we could fix a fix for
-
-Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
-Closes #15439
-(cherry picked from commit 05c4710e8958832afc2868102c9535a4f18115be)
-Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
----
- man/man4/zfs.4                       | 57 ++++++++++++++++++++++++++++
- module/os/linux/zfs/zvol_os.c        | 12 ++++++
- tests/zfs-tests/include/tunables.cfg |  2 +-
- 3 files changed, 70 insertions(+), 1 deletion(-)
-
-diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
-index 71a3e67ee..cfadd79d8 100644
---- a/man/man4/zfs.4
-+++ b/man/man4/zfs.4
-@@ -2317,6 +2317,63 @@ If
- .Sy zvol_threads
- to the number of CPUs present or 32 (whichever is greater).
- .
-+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
-+The number of threads per zvol to use for queuing IO requests.
-+This parameter will only appear if your kernel supports
-+.Li blk-mq
-+and is only read and assigned to a zvol at zvol load time.
-+If
-+.Sy 0
-+(the default) then internally set
-+.Sy zvol_blk_mq_threads
-+to the number of CPUs present.
-+.
-+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
-+Set to
-+.Sy 1
-+to use the
-+.Li blk-mq
-+API for zvols.
-+Set to
-+.Sy 0
-+(the default) to use the legacy zvol APIs.
-+This setting can give better or worse zvol performance depending on
-+the workload.
-+This parameter will only appear if your kernel supports
-+.Li blk-mq
-+and is only read and assigned to a zvol at zvol load time.
-+.
-+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
-+If
-+.Sy zvol_use_blk_mq
-+is enabled, then process this number of
-+.Sy volblocksize Ns -sized blocks per zvol thread.
-+This tunable can be use to favor better performance for zvol reads (lower
-+values) or writes (higher values).
-+If set to
-+.Sy 0 ,
-+then the zvol layer will process the maximum number of blocks
-+per thread that it can.
-+This parameter will only appear if your kernel supports
-+.Li blk-mq
-+and is only applied at each zvol's load time.
-+.
-+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
-+The queue_depth value for the zvol
-+.Li blk-mq
-+interface.
-+This parameter will only appear if your kernel supports
-+.Li blk-mq
-+and is only applied at each zvol's load time.
-+If
-+.Sy 0
-+(the default) then use the kernel's default queue depth.
-+Values are clamped to the kernel's
-+.Dv BLKDEV_MIN_RQ
-+and
-+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
-+limits.
-+.
- .It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
- Defines zvol block devices behaviour when
- .Sy volmode Ns = Ns Sy default :
-diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c
-index 76521c959..7a95b54bd 100644
---- a/module/os/linux/zfs/zvol_os.c
-+++ b/module/os/linux/zfs/zvol_os.c
-@@ -1620,6 +1620,18 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
- module_param(zvol_volmode, uint, 0644);
- MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
- 
-+#ifdef HAVE_BLK_MQ
-+module_param(zvol_blk_mq_queue_depth, uint, 0644);
-+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
-+
-+module_param(zvol_use_blk_mq, uint, 0644);
-+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
-+
-+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
-+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
-+    "Process volblocksize blocks per thread");
-+#endif
-+
- #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
- module_param(zvol_open_timeout_ms, uint, 0644);
- MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
-diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
-index 8010a9451..80e7bcb3b 100644
---- a/tests/zfs-tests/include/tunables.cfg
-+++ b/tests/zfs-tests/include/tunables.cfg
-@@ -89,7 +89,7 @@ VDEV_VALIDATE_SKIP		vdev.validate_skip		vdev_validate_skip
- VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
- VOL_MODE			vol.mode			zvol_volmode
- VOL_RECURSIVE			vol.recursive			UNSUPPORTED
--VOL_USE_BLK_MQ			UNSUPPORTED			UNSUPPORTED
-+VOL_USE_BLK_MQ			UNSUPPORTED			zvol_use_blk_mq
- XATTR_COMPAT			xattr_compat			zfs_xattr_compat
- ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
- ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
diff --git a/debian/patches/0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch b/debian/patches/0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
similarity index 100%
rename from debian/patches/0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
rename to debian/patches/0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
diff --git a/debian/patches/0013-Workaround-UBSAN-errors-for-variable-arrays.patch b/debian/patches/0013-Workaround-UBSAN-errors-for-variable-arrays.patch
deleted file mode 100644
index 0b98c42a..00000000
--- a/debian/patches/0013-Workaround-UBSAN-errors-for-variable-arrays.patch
+++ /dev/null
@@ -1,72 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tony Hutter <hutter2 at llnl.gov>
-Date: Thu, 9 Nov 2023 16:43:35 -0800
-Subject: [PATCH] Workaround UBSAN errors for variable arrays
-
-This gets around UBSAN errors when using arrays at the end of
-structs.  It converts some zero-length arrays to variable length
-arrays and disables UBSAN checking on certain modules.
-
-It is based off of the patch from #15460.
-
-Addresses: #15145
-Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
-Co-authored-by: Tony Hutter <hutter2 at llnl.gov>
-Co-authored-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht at proxmox.com>
----
- include/os/linux/spl/sys/kmem_cache.h | 2 +-
- include/sys/vdev_raidz_impl.h         | 4 ++--
- module/Kbuild.in                      | 4 ++++
- 3 files changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/include/os/linux/spl/sys/kmem_cache.h b/include/os/linux/spl/sys/kmem_cache.h
-index 20eeadc46..82d50b603 100644
---- a/include/os/linux/spl/sys/kmem_cache.h
-+++ b/include/os/linux/spl/sys/kmem_cache.h
-@@ -108,7 +108,7 @@ typedef struct spl_kmem_magazine {
- 	uint32_t		skm_refill;	/* Batch refill size */
- 	struct spl_kmem_cache	*skm_cache;	/* Owned by cache */
- 	unsigned int		skm_cpu;	/* Owned by cpu */
--	void			*skm_objs[0];	/* Object pointers */
-+	void			*skm_objs[];	/* Object pointers */
- } spl_kmem_magazine_t;
- 
- typedef struct spl_kmem_obj {
-diff --git a/include/sys/vdev_raidz_impl.h b/include/sys/vdev_raidz_impl.h
-index c1037fa12..73c26dff1 100644
---- a/include/sys/vdev_raidz_impl.h
-+++ b/include/sys/vdev_raidz_impl.h
-@@ -130,7 +130,7 @@ typedef struct raidz_row {
- 	uint64_t rr_offset;		/* Logical offset for *_io_verify() */
- 	uint64_t rr_size;		/* Physical size for *_io_verify() */
- #endif
--	raidz_col_t rr_col[0];		/* Flexible array of I/O columns */
-+	raidz_col_t rr_col[];		/* Flexible array of I/O columns */
- } raidz_row_t;
- 
- typedef struct raidz_map {
-@@ -139,7 +139,7 @@ typedef struct raidz_map {
- 	int rm_nskip;			/* RAIDZ sectors skipped for padding */
- 	int rm_skipstart;		/* Column index of padding start */
- 	const raidz_impl_ops_t *rm_ops;	/* RAIDZ math operations */
--	raidz_row_t *rm_row[0];		/* flexible array of rows */
-+	raidz_row_t *rm_row[];		/* flexible array of rows */
- } raidz_map_t;
- 
- 
-diff --git a/module/Kbuild.in b/module/Kbuild.in
-index c13217159..b9c284a24 100644
---- a/module/Kbuild.in
-+++ b/module/Kbuild.in
-@@ -488,6 +488,10 @@ zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
- zfs-$(CONFIG_PPC)   += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
- zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
- 
-+UBSAN_SANITIZE_zap_leaf.o := n
-+UBSAN_SANITIZE_zap_micro.o := n
-+UBSAN_SANITIZE_sa.o := n
-+
- # Suppress incorrect warnings from versions of objtool which are not
- # aware of x86 EVEX prefix instructions used for AVX512.
- OBJECT_FILES_NON_STANDARD_vdev_raidz_math_avx512bw.o := y
diff --git a/debian/patches/0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch b/debian/patches/0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
deleted file mode 100644
index c2fc506e..00000000
--- a/debian/patches/0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Martin=20Matu=C5=A1ka?= <mm at FreeBSD.org>
-Date: Tue, 31 Oct 2023 21:49:41 +0100
-Subject: [PATCH] Fix block cloning between unencrypted and encrypted datasets
-
-Block cloning from an encrypted dataset into an unencrypted dataset
-and vice versa is not possible. The current code did allow cloning
-unencrypted files into an encrypted dataset causing a panic when
-these were accessed. Block cloning between encrypted and encrypted
-is currently supported on the same filesystem only.
-
-Reviewed-by: Alexander Motin <mav at FreeBSD.org>
-Reviewed-by: Kay Pedersen <mail at mkwg.de>
-Reviewed-by: Rob N <robn at despairlabs.com>
-Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
-Signed-off-by: Martin Matuska <mm at FreeBSD.org>
-Closes #15464
-Closes #15465
-(cherry picked from commit 459c99ff2339a4a514abcf2255f9b3e5324ef09e)
-Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
----
- module/zfs/zfs_vnops.c | 9 +++++++++
- 1 file changed, 9 insertions(+)
-
-diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
-index 40d6c87a7..84e6b10ef 100644
---- a/module/zfs/zfs_vnops.c
-+++ b/module/zfs/zfs_vnops.c
-@@ -1094,6 +1094,15 @@ zfs_clone_range(znode_t *inzp, uint64_t *inoffp, znode_t *outzp,
- 
- 	ASSERT(!outzfsvfs->z_replay);
- 
-+	/*
-+	 * Block cloning from an unencrypted dataset into an encrypted
-+	 * dataset and vice versa is not supported.
-+	 */
-+	if (inos->os_encrypted != outos->os_encrypted) {
-+		zfs_exit_two(inzfsvfs, outzfsvfs, FTAG);
-+		return (SET_ERROR(EXDEV));
-+	}
-+
- 	error = zfs_verify_zp(inzp);
- 	if (error == 0)
- 		error = zfs_verify_zp(outzp);
diff --git a/debian/patches/0016-Add-a-tunable-to-disable-BRT-support.patch b/debian/patches/0016-Add-a-tunable-to-disable-BRT-support.patch
deleted file mode 100644
index 53977479..00000000
--- a/debian/patches/0016-Add-a-tunable-to-disable-BRT-support.patch
+++ /dev/null
@@ -1,201 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Rich Ercolani <214141+rincebrain at users.noreply.github.com>
-Date: Thu, 16 Nov 2023 14:35:22 -0500
-Subject: [PATCH] Add a tunable to disable BRT support.
-
-Copy the disable parameter that FreeBSD implemented, and extend it to
-work on Linux as well, until we're sure this is stable.
-
-Reviewed-by: Alexander Motin <mav at FreeBSD.org>
-Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
-Signed-off-by: Rich Ercolani <rincebrain at gmail.com>
-Closes #15529
-(cherry picked from commit 87e9e828655c250ce064874ff5df16f870c0a52e)
-Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
----
- include/os/freebsd/zfs/sys/zfs_vfsops_os.h        |  1 +
- include/os/linux/zfs/sys/zfs_vfsops_os.h          |  2 ++
- man/man4/zfs.4                                    |  5 +++++
- module/os/freebsd/zfs/zfs_vfsops.c                |  4 ++++
- module/os/freebsd/zfs/zfs_vnops_os.c              |  5 +++++
- module/os/linux/zfs/zfs_vnops_os.c                |  4 ++++
- module/os/linux/zfs/zpl_file_range.c              |  5 +++++
- tests/zfs-tests/include/libtest.shlib             | 15 +++++++++++++++
- tests/zfs-tests/include/tunables.cfg              |  1 +
- .../tests/functional/block_cloning/cleanup.ksh    |  4 ++++
- .../tests/functional/block_cloning/setup.ksh      |  5 +++++
- 11 files changed, 51 insertions(+)
-
-diff --git a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
-index 24bb03575..56a0ac96a 100644
---- a/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
-+++ b/include/os/freebsd/zfs/sys/zfs_vfsops_os.h
-@@ -286,6 +286,7 @@ typedef struct zfid_long {
- 
- extern uint_t zfs_fsyncer_key;
- extern int zfs_super_owner;
-+extern int zfs_bclone_enabled;
- 
- extern void zfs_init(void);
- extern void zfs_fini(void);
-diff --git a/include/os/linux/zfs/sys/zfs_vfsops_os.h b/include/os/linux/zfs/sys/zfs_vfsops_os.h
-index b4d5db21f..220466550 100644
---- a/include/os/linux/zfs/sys/zfs_vfsops_os.h
-+++ b/include/os/linux/zfs/sys/zfs_vfsops_os.h
-@@ -45,6 +45,8 @@ extern "C" {
- typedef struct zfsvfs zfsvfs_t;
- struct znode;
- 
-+extern int zfs_bclone_enabled;
-+
- /*
-  * This structure emulates the vfs_t from other platforms.  It's purpose
-  * is to facilitate the handling of mount options and minimize structural
-diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
-index cfadd79d8..32f1765a5 100644
---- a/man/man4/zfs.4
-+++ b/man/man4/zfs.4
-@@ -1137,6 +1137,11 @@ Selecting any option other than
- results in vector instructions
- from the respective CPU instruction set being used.
- .
-+.It Sy zfs_bclone_enabled Ns = Ns Sy 1 Ns | Ns 0 Pq int
-+Enable the experimental block cloning feature.
-+If this setting is 0, then even if feature at block_cloning is enabled,
-+attempts to clone blocks will act as though the feature is disabled.
-+.
- .It Sy zfs_blake3_impl Ns = Ns Sy fastest Pq string
- Select a BLAKE3 implementation.
- .Pp
-diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
-index e8b9ada13..09e18de81 100644
---- a/module/os/freebsd/zfs/zfs_vfsops.c
-+++ b/module/os/freebsd/zfs/zfs_vfsops.c
-@@ -89,6 +89,10 @@ int zfs_debug_level;
- SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
- 	"Debug level");
- 
-+int zfs_bclone_enabled = 1;
-+SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
-+	&zfs_bclone_enabled, 0, "Enable block cloning");
-+
- struct zfs_jailparam {
- 	int mount_snapshot;
- };
-diff --git a/module/os/freebsd/zfs/zfs_vnops_os.c b/module/os/freebsd/zfs/zfs_vnops_os.c
-index c498a1328..f672deed3 100644
---- a/module/os/freebsd/zfs/zfs_vnops_os.c
-+++ b/module/os/freebsd/zfs/zfs_vnops_os.c
-@@ -6243,6 +6243,11 @@ zfs_freebsd_copy_file_range(struct vop_copy_file_range_args *ap)
- 	int error;
- 	uint64_t len = *ap->a_lenp;
- 
-+	if (!zfs_bclone_enabled) {
-+		mp = NULL;
-+		goto bad_write_fallback;
-+	}
-+
- 	/*
- 	 * TODO: If offset/length is not aligned to recordsize, use
- 	 * vn_generic_copy_file_range() on this fragment.
-diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
-index 33baac9db..76fac3a02 100644
---- a/module/os/linux/zfs/zfs_vnops_os.c
-+++ b/module/os/linux/zfs/zfs_vnops_os.c
-@@ -4229,4 +4229,8 @@ EXPORT_SYMBOL(zfs_map);
- module_param(zfs_delete_blocks, ulong, 0644);
- MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
- 
-+/* CSTYLED */
-+module_param(zfs_bclone_enabled, uint, 0644);
-+MODULE_PARM_DESC(zfs_bclone_enabled, "Enable block cloning");
-+
- #endif
-diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
-index c47fe99da..73476ff40 100644
---- a/module/os/linux/zfs/zpl_file_range.c
-+++ b/module/os/linux/zfs/zpl_file_range.c
-@@ -31,6 +31,8 @@
- #include <sys/zfs_vnops.h>
- #include <sys/zfeature.h>
- 
-+int zfs_bclone_enabled = 1;
-+
- /*
-  * Clone part of a file via block cloning.
-  *
-@@ -50,6 +52,9 @@ __zpl_clone_file_range(struct file *src_file, loff_t src_off,
- 	fstrans_cookie_t cookie;
- 	int err;
- 
-+	if (!zfs_bclone_enabled)
-+		return (-EOPNOTSUPP);
-+
- 	if (!spa_feature_is_enabled(
- 	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
- 		return (-EOPNOTSUPP);
-diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
-index 844caa17d..d5d7bb6c8 100644
---- a/tests/zfs-tests/include/libtest.shlib
-+++ b/tests/zfs-tests/include/libtest.shlib
-@@ -3334,6 +3334,21 @@ function set_tunable_impl
- 	esac
- }
- 
-+function save_tunable
-+{
-+	[[ ! -d $TEST_BASE_DIR ]] && return 1
-+	[[ -e $TEST_BASE_DIR/tunable-$1 ]] && return 2
-+	echo "$(get_tunable """$1""")" > "$TEST_BASE_DIR"/tunable-"$1"
-+}
-+
-+function restore_tunable
-+{
-+	[[ ! -e $TEST_BASE_DIR/tunable-$1 ]] && return 1
-+	val="$(cat $TEST_BASE_DIR/tunable-"""$1""")"
-+	set_tunable64 "$1" "$val"
-+	rm $TEST_BASE_DIR/tunable-$1
-+}
-+
- #
- # Get a global system tunable
- #
-diff --git a/tests/zfs-tests/include/tunables.cfg b/tests/zfs-tests/include/tunables.cfg
-index 80e7bcb3b..a0edad14d 100644
---- a/tests/zfs-tests/include/tunables.cfg
-+++ b/tests/zfs-tests/include/tunables.cfg
-@@ -90,6 +90,7 @@ VOL_INHIBIT_DEV			UNSUPPORTED			zvol_inhibit_dev
- VOL_MODE			vol.mode			zvol_volmode
- VOL_RECURSIVE			vol.recursive			UNSUPPORTED
- VOL_USE_BLK_MQ			UNSUPPORTED			zvol_use_blk_mq
-+BCLONE_ENABLED			zfs_bclone_enabled		zfs_bclone_enabled
- XATTR_COMPAT			xattr_compat			zfs_xattr_compat
- ZEVENT_LEN_MAX			zevent.len_max			zfs_zevent_len_max
- ZEVENT_RETAIN_MAX		zevent.retain_max		zfs_zevent_retain_max
-diff --git a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
-index 7ac13adb6..b985445a5 100755
---- a/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
-+++ b/tests/zfs-tests/tests/functional/block_cloning/cleanup.ksh
-@@ -31,4 +31,8 @@ verify_runnable "global"
- 
- default_cleanup_noexit
- 
-+if tunable_exists BCLONE_ENABLED ; then
-+	log_must restore_tunable BCLONE_ENABLED
-+fi
-+
- log_pass
-diff --git a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
-index 512f5a064..58441bf8f 100755
---- a/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
-+++ b/tests/zfs-tests/tests/functional/block_cloning/setup.ksh
-@@ -33,4 +33,9 @@ fi
- 
- verify_runnable "global"
- 
-+if tunable_exists BCLONE_ENABLED ; then
-+    log_must save_tunable BCLONE_ENABLED
-+    log_must set_tunable32 BCLONE_ENABLED 1
-+fi
-+
- log_pass
diff --git a/debian/patches/0017-zfs-2.2.1-Disable-block-cloning-by-default.patch b/debian/patches/0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
deleted file mode 100644
index 53a088da..00000000
--- a/debian/patches/0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Tony Hutter <hutter2 at llnl.gov>
-Date: Thu, 16 Nov 2023 11:42:19 -0800
-Subject: [PATCH] zfs-2.2.1: Disable block cloning by default
-
-Disable block cloning by default to mitigate possible data corruption
-(see #15529 and #15526).
-
-Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
-(cherry picked from commit 479dca51c66a731e637bd2d4f9bba01a05f9ac9f)
-Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
----
- module/os/freebsd/zfs/zfs_vfsops.c   | 2 +-
- module/os/linux/zfs/zpl_file_range.c | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/module/os/freebsd/zfs/zfs_vfsops.c b/module/os/freebsd/zfs/zfs_vfsops.c
-index 09e18de81..0ac670ed9 100644
---- a/module/os/freebsd/zfs/zfs_vfsops.c
-+++ b/module/os/freebsd/zfs/zfs_vfsops.c
-@@ -89,7 +89,7 @@ int zfs_debug_level;
- SYSCTL_INT(_vfs_zfs, OID_AUTO, debug, CTLFLAG_RWTUN, &zfs_debug_level, 0,
- 	"Debug level");
- 
--int zfs_bclone_enabled = 1;
-+int zfs_bclone_enabled = 0;
- SYSCTL_INT(_vfs_zfs, OID_AUTO, bclone_enabled, CTLFLAG_RWTUN,
- 	&zfs_bclone_enabled, 0, "Enable block cloning");
- 
-diff --git a/module/os/linux/zfs/zpl_file_range.c b/module/os/linux/zfs/zpl_file_range.c
-index 73476ff40..139c51cf4 100644
---- a/module/os/linux/zfs/zpl_file_range.c
-+++ b/module/os/linux/zfs/zpl_file_range.c
-@@ -31,7 +31,7 @@
- #include <sys/zfs_vnops.h>
- #include <sys/zfeature.h>
- 
--int zfs_bclone_enabled = 1;
-+int zfs_bclone_enabled = 0;
- 
- /*
-  * Clone part of a file via block cloning.
diff --git a/debian/patches/0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch b/debian/patches/0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch
deleted file mode 100644
index f79b09bd..00000000
--- a/debian/patches/0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch
+++ /dev/null
@@ -1,97 +0,0 @@
-From 9b9b09f452a469458451c221debfbab944e7f081 Mon Sep 17 00:00:00 2001
-From: Rob N <robn at despairlabs.com>
-Date: Wed, 29 Nov 2023 04:15:48 +1100
-Subject: [PATCH] dnode_is_dirty: check dnode and its data for dirtiness
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Over its history this the dirty dnode test has been changed between
-checking for a dnodes being on `os_dirty_dnodes` (`dn_dirty_link`) and
-`dn_dirty_record`.
-
-  de198f2d9 Fix lseek(SEEK_DATA/SEEK_HOLE) mmap consistency
-  2531ce372 Revert "Report holes when there are only metadata changes"
-  ec4f9b8f3 Report holes when there are only metadata changes
-  454365bba Fix dirty check in dmu_offset_next()
-  66aca2473 SEEK_HOLE should not block on txg_wait_synced()
-
-Also illumos/illumos-gate at c543ec060d illumos/illumos-gate at 2bcf0248e9
-
-It turns out both are actually required.
-
-In the case of appending data to a newly created file, the dnode proper
-is dirtied (at least to change the blocksize) and dirty records are
-added.  Thus, a single logical operation is represented by separate
-dirty indicators, and must not be separated.
-
-The incorrect dirty check becomes a problem when the first block of a
-file is being appended to while another process is calling lseek to skip
-holes. There is a small window where the dnode part is undirtied while
-there are still dirty records. In this case, `lseek(fd, 0, SEEK_DATA)`
-would not know that the file is dirty, and would go to
-`dnode_next_offset()`. Since the object has no data blocks yet, it
-returns `ESRCH`, indicating no data found, which results in `ENXIO`
-being returned to `lseek()`'s caller.
-
-Since coreutils 9.2, `cp` performs sparse copies by default, that is, it
-uses `SEEK_DATA` and `SEEK_HOLE` against the source file and attempts to
-replicate the holes in the target. When it hits the bug, its initial
-search for data fails, and it goes on to call `fallocate()` to create a
-hole over the entire destination file.
-
-This has come up more recently as users upgrade their systems, getting
-OpenZFS 2.2 as well as a newer coreutils. However, this problem has been
-reproduced against 2.1, as well as on FreeBSD 13 and 14.
-
-This change simply updates the dirty check to check both types of dirty.
-If there's anything dirty at all, we immediately go to the "wait for
-sync" stage, It doesn't really matter after that; both changes are on
-disk, so the dirty fields should be correct.
-
-Sponsored-by: Klara, Inc.
-Sponsored-by: Wasabi Technology, Inc.
-Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
-Reviewed-by: Alexander Motin <mav at FreeBSD.org>
-Reviewed-by: Rich Ercolani <rincebrain at gmail.com>
-Signed-off-by: Rob Norris <rob.norris at klarasystems.com>
-Closes #15571
-Closes #15526
-Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
----
- module/zfs/dnode.c | 12 ++++++++++--
- 1 file changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
-index 7cf03264d..ad9988366 100644
---- a/module/zfs/dnode.c
-+++ b/module/zfs/dnode.c
-@@ -1764,7 +1764,14 @@ dnode_try_claim(objset_t *os, uint64_t object, int slots)
- }
- 
- /*
-- * Checks if the dnode contains any uncommitted dirty records.
-+ * Checks if the dnode itself is dirty, or is carrying any uncommitted records.
-+ * It is important to check both conditions, as some operations (eg appending
-+ * to a file) can dirty both as a single logical unit, but they are not synced
-+ * out atomically, so checking one and not the other can result in an object
-+ * appearing to be clean mid-way through a commit.
-+ *
-+ * Do not change this lightly! If you get it wrong, dmu_offset_next() can
-+ * detect a hole where there is really data, leading to silent corruption.
-  */
- boolean_t
- dnode_is_dirty(dnode_t *dn)
-@@ -1772,7 +1779,8 @@ dnode_is_dirty(dnode_t *dn)
- 	mutex_enter(&dn->dn_mtx);
- 
- 	for (int i = 0; i < TXG_SIZE; i++) {
--		if (multilist_link_active(&dn->dn_dirty_link[i])) {
-+		if (multilist_link_active(&dn->dn_dirty_link[i]) ||
-+		    !list_is_empty(&dn->dn_dirty_records[i])) {
- 			mutex_exit(&dn->dn_mtx);
- 			return (B_TRUE);
- 		}
--- 
-2.39.2
-
diff --git a/debian/patches/series b/debian/patches/series
index ac820c74..35f81d13 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -7,12 +7,5 @@
 0007-Add-systemd-unit-for-importing-specific-pools.patch
 0008-Patch-move-manpage-arcstat-1-to-arcstat-8.patch
 0009-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch
-0010-zvol-Remove-broken-blk-mq-optimization.patch
-0011-Revert-zvol-Temporally-disable-blk-mq.patch
-0012-Fix-nfs_truncate_shares-without-etc-exports.d.patch
-0013-Workaround-UBSAN-errors-for-variable-arrays.patch
-0014-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
-0015-Fix-block-cloning-between-unencrypted-and-encrypted-.patch
-0016-Add-a-tunable-to-disable-BRT-support.patch
-0017-zfs-2.2.1-Disable-block-cloning-by-default.patch
-0018-dnode_is_dirty-check-dnode-and-its-data-for-dirtines.patch
+0010-Fix-nfs_truncate_shares-without-etc-exports.d.patch
+0011-zpool-status-tighten-bounds-for-noalloc-stat-availab.patch
diff --git a/upstream b/upstream
index 95785196..494aaaed 160000
--- a/upstream
+++ b/upstream
@@ -1 +1 @@
-Subproject commit 95785196f26e92d82cf4445654ba84e4a9671c57
+Subproject commit 494aaaed89cb9fe9f2da3b6c6f465a4bc9f6a7e1
-- 
2.39.2





More information about the pve-devel mailing list