[pve-devel] [PATCH zfsonlinux 2/3] update/rebase to zfs-0.7.10 with patches from ZOL
Stoiko Ivanov
s.ivanov at proxmox.com
Tue Sep 11 11:43:41 CEST 2018
Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
...lock-between-zfs-umount-snapentry_expire.patch} | 0
...zv_suspend_lock-in-zvol_open-zvol_release.patch | 124 +++
...nux-4.18-compat-inode-timespec-timespec64.patch | 560 +++++++++++++
...-Linux-compat-4.18-check_disk_size_change.patch | 808 +++++++++++++++++++
...7-ztest-assertion-failure-in-zil_lwb_writ.patch | 368 +++++++++
...09-Fix-divide-by-zero-in-mmp_delay_update.patch | 34 +
...-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch | 867 +++++++++++++++++++++
...011-Trim-new-line-from-zfs_vdev_scheduler.patch | 155 ++++
...param-callbacks-check-for-initialized-spa.patch | 84 ++
zfs-patches/0013-Support-Debian-DKMS-builds.patch | 52 ++
...ool-reopen-should-detect-expanded-devices.patch | 376 +++++++++
...Add-pool-state-proc-entry-SUSPENDED-pools.patch | 686 ++++++++++++++++
...016-Linux-4.14-compat-blk_queue_stackable.patch | 115 +++
...efault-ashift-for-Amazon-EC2-NVMe-devices.patch | 54 ++
...18-Fix-kernel-unaligned-access-on-sparc64.patch | 123 +++
...ock.patch => 0019-Fix-zpl_mount-deadlock.patch} | 1 -
...6-uts-illumos-rootfs-should-support-salte.patch | 133 ++++
...-zfs-incremental-send-remove-o-properties.patch | 108 +++
...nherited-properties-in-zfs_check_settable.patch | 95 +++
...rcstat.py-handling-of-unsupported-options.patch | 33 +
.../0024-Don-t-modify-argv-in-user-tools.patch | 123 +++
...5-Add-missing-zfs-dracut-RPM-dependencies.patch | 42 +
.../0026-Add-libaio-devel-BuildRequires.patch | 31 +
...devel-requirement-for-Debian-based-distri.patch | 36 +
...8-Fix-misc-bounds-check-compiler-warnings.patch | 61 ++
...Fix-problems-receiving-reallocated-dnodes.patch | 556 +++++++++++++
...ix-object-reclaim-when-using-large-dnodes.patch | 134 ++++
...-zfs-recv-of-non-large_dnode-send-streams.patch | 124 +++
...x-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch | 42 +
...33-Correctly-handle-errors-from-kern_path.patch | 35 +
zfs-patches/0034-Tag-zfs-0.7.10.patch | 56 ++
zfs-patches/series | 33 +-
32 files changed, 6046 insertions(+), 3 deletions(-)
rename zfs-patches/{0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch => 0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch} (100%)
create mode 100644 zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch
create mode 100644 zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch
create mode 100644 zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch
create mode 100644 zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch
create mode 100644 zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch
create mode 100644 zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch
create mode 100644 zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch
create mode 100644 zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch
create mode 100644 zfs-patches/0013-Support-Debian-DKMS-builds.patch
create mode 100644 zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch
create mode 100644 zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch
create mode 100644 zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch
create mode 100644 zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch
create mode 100644 zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch
rename zfs-patches/{0004-Fix-zpl_mount-deadlock.patch => 0019-Fix-zpl_mount-deadlock.patch} (97%)
create mode 100644 zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch
create mode 100644 zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch
create mode 100644 zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch
create mode 100644 zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch
create mode 100644 zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch
create mode 100644 zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch
create mode 100644 zfs-patches/0026-Add-libaio-devel-BuildRequires.patch
create mode 100644 zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch
create mode 100644 zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch
create mode 100644 zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch
create mode 100644 zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch
create mode 100644 zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch
create mode 100644 zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch
create mode 100644 zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch
create mode 100644 zfs-patches/0034-Tag-zfs-0.7.10.patch
diff --git a/zfs-patches/0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch b/zfs-patches/0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
similarity index 100%
rename from zfs-patches/0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
rename to zfs-patches/0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
diff --git a/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch b/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch
new file mode 100644
index 0000000..6a61f1a
--- /dev/null
+++ b/zfs-patches/0005-zv_suspend_lock-in-zvol_open-zvol_release.patch
@@ -0,0 +1,124 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Boris Protopopov <bprotopopov at users.noreply.github.com>
+Date: Wed, 9 Aug 2017 14:10:47 -0400
+Subject: [PATCH] zv_suspend_lock in zvol_open()/zvol_release()
+
+Acquire zv_suspend_lock on first open and last close only.
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Boris Protopopov <boris.protopopov at actifio.com>
+Closes #6342
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/zvol.c | 64 +++++++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 41 insertions(+), 23 deletions(-)
+
+diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
+index 3e7059b3..ffa5fac7 100644
+--- a/module/zfs/zvol.c
++++ b/module/zfs/zvol.c
+@@ -1347,9 +1347,9 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+ {
+ zvol_state_t *zv;
+ int error = 0;
+- boolean_t drop_suspend = B_FALSE;
++ boolean_t drop_suspend = B_TRUE;
+
+- ASSERT(!mutex_owned(&zvol_state_lock));
++ ASSERT(!MUTEX_HELD(&zvol_state_lock));
+
+ mutex_enter(&zvol_state_lock);
+ /*
+@@ -1364,23 +1364,31 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+ return (SET_ERROR(-ENXIO));
+ }
+
+- /* take zv_suspend_lock before zv_state_lock */
+- rw_enter(&zv->zv_suspend_lock, RW_READER);
+-
+ mutex_enter(&zv->zv_state_lock);
+-
+ /*
+ * make sure zvol is not suspended during first open
+- * (hold zv_suspend_lock), otherwise, drop the lock
++ * (hold zv_suspend_lock) and respect proper lock acquisition
++ * ordering - zv_suspend_lock before zv_state_lock
+ */
+ if (zv->zv_open_count == 0) {
+- drop_suspend = B_TRUE;
++ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
++ mutex_exit(&zv->zv_state_lock);
++ rw_enter(&zv->zv_suspend_lock, RW_READER);
++ mutex_enter(&zv->zv_state_lock);
++ /* check to see if zv_suspend_lock is needed */
++ if (zv->zv_open_count != 0) {
++ rw_exit(&zv->zv_suspend_lock);
++ drop_suspend = B_FALSE;
++ }
++ }
+ } else {
+- rw_exit(&zv->zv_suspend_lock);
++ drop_suspend = B_FALSE;
+ }
+-
+ mutex_exit(&zvol_state_lock);
+
++ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
++ ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
++
+ if (zv->zv_open_count == 0) {
+ error = zvol_first_open(zv);
+ if (error)
+@@ -1417,28 +1425,38 @@ static int
+ zvol_release(struct gendisk *disk, fmode_t mode)
+ {
+ zvol_state_t *zv;
+- boolean_t drop_suspend = B_FALSE;
++ boolean_t drop_suspend = B_TRUE;
+
+- ASSERT(!mutex_owned(&zvol_state_lock));
++ ASSERT(!MUTEX_HELD(&zvol_state_lock));
+
+ mutex_enter(&zvol_state_lock);
+ zv = disk->private_data;
+- ASSERT(zv && zv->zv_open_count > 0);
+-
+- /* take zv_suspend_lock before zv_state_lock */
+- rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+ mutex_enter(&zv->zv_state_lock);
+- mutex_exit(&zvol_state_lock);
+-
++ ASSERT(zv->zv_open_count > 0);
+ /*
+ * make sure zvol is not suspended during last close
+- * (hold zv_suspend_lock), otherwise, drop the lock
++ * (hold zv_suspend_lock) and respect proper lock acquisition
++ * ordering - zv_suspend_lock before zv_state_lock
+ */
+- if (zv->zv_open_count == 1)
+- drop_suspend = B_TRUE;
+- else
+- rw_exit(&zv->zv_suspend_lock);
++ if (zv->zv_open_count == 1) {
++ if (!rw_tryenter(&zv->zv_suspend_lock, RW_READER)) {
++ mutex_exit(&zv->zv_state_lock);
++ rw_enter(&zv->zv_suspend_lock, RW_READER);
++ mutex_enter(&zv->zv_state_lock);
++ /* check to see if zv_suspend_lock is needed */
++ if (zv->zv_open_count != 1) {
++ rw_exit(&zv->zv_suspend_lock);
++ drop_suspend = B_FALSE;
++ }
++ }
++ } else {
++ drop_suspend = B_FALSE;
++ }
++ mutex_exit(&zvol_state_lock);
++
++ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
++ ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
+
+ zv->zv_open_count--;
+ if (zv->zv_open_count == 0)
diff --git a/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch b/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch
new file mode 100644
index 0000000..5738b0c
--- /dev/null
+++ b/zfs-patches/0006-Linux-4.18-compat-inode-timespec-timespec64.patch
@@ -0,0 +1,560 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Sun, 12 Aug 2018 18:22:03 -0400
+Subject: [PATCH] Linux 4.18 compat: inode timespec -> timespec64
+
+Commit torvalds/linux at 95582b0 changes the inode i_atime, i_mtime,
+and i_ctime members form timespec's to timespec64's to make them
+2038 safe. As part of this change the current_time() function was
+also updated to return the timespec64 type.
+
+Resolve this issue by introducing a new inode_timespec_t type which
+is defined to match the timespec type used by the inode. It should
+be used when working with inode timestamps to ensure matching types.
+
+The timestruc_t type under Illumos was used in a similar fashion but
+was specified to always be a timespec_t. Rather than incorrectly
+define this type all timespec_t types have been replaced by the new
+inode_timespec_t type.
+
+Finally, the kernel and user space 'sys/time.h' headers were aligned
+with each other. They define as appropriate for the context several
+constants as macros and include static inline implementation of
+gethrestime(), gethrestime_sec(), and gethrtime().
+
+Reviewed-by: Chunwei Chen <tuxoko at gmail.com>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7643
+Backported-by: Richard Yao <ryao at gentoo.org>
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ config/kernel-current-time.m4 | 7 +++----
+ include/sys/dmu.h | 2 +-
+ include/sys/dmu_objset.h | 2 +-
+ include/sys/dsl_dir.h | 4 ++--
+ include/sys/spa_impl.h | 2 +-
+ include/sys/xvattr.h | 2 +-
+ include/sys/zfs_context.h | 9 +--------
+ include/sys/zfs_znode.h | 33 +++++++++++++++++++++++--------
+ include/sys/zpl.h | 9 +++++++++
+ lib/libspl/Makefile.am | 2 --
+ lib/libspl/gethrestime.c | 38 ------------------------------------
+ lib/libspl/gethrtime.c | 45 -------------------------------------------
+ lib/libspl/include/sys/time.h | 37 +++++++++++++++++++++++++++--------
+ lib/libzpool/kernel.c | 4 ++--
+ module/zfs/dmu_objset.c | 2 +-
+ module/zfs/dsl_dir.c | 6 +++---
+ module/zfs/fm.c | 2 +-
+ module/zfs/zfs_ctldir.c | 2 +-
+ module/zfs/zfs_vnops.c | 4 ++--
+ module/zfs/zfs_znode.c | 4 ++--
+ module/zfs/zpl_inode.c | 5 +++--
+ 21 files changed, 88 insertions(+), 133 deletions(-)
+ delete mode 100644 lib/libspl/gethrestime.c
+ delete mode 100644 lib/libspl/gethrtime.c
+
+diff --git a/config/kernel-current-time.m4 b/config/kernel-current-time.m4
+index 2ede9ff3..c7d5c9b5 100644
+--- a/config/kernel-current-time.m4
++++ b/config/kernel-current-time.m4
+@@ -1,15 +1,14 @@
+ dnl #
+ dnl # 4.9, current_time() added
++dnl # 4.18, return type changed from timespec to timespec64
+ dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_CURRENT_TIME],
+ [AC_MSG_CHECKING([whether current_time() exists])
+ ZFS_LINUX_TRY_COMPILE_SYMBOL([
+ #include <linux/fs.h>
+ ], [
+- struct inode ip;
+- struct timespec now __attribute__ ((unused));
+-
+- now = current_time(&ip);
++ struct inode ip __attribute__ ((unused));
++ ip.i_atime = current_time(&ip);
+ ], [current_time], [fs/inode.c], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_CURRENT_TIME, 1, [current_time() exists])
+diff --git a/include/sys/dmu.h b/include/sys/dmu.h
+index bcdf7d64..755a9056 100644
+--- a/include/sys/dmu.h
++++ b/include/sys/dmu.h
+@@ -891,7 +891,7 @@ uint64_t dmu_objset_fsid_guid(objset_t *os);
+ /*
+ * Get the [cm]time for an objset's snapshot dir
+ */
+-timestruc_t dmu_objset_snap_cmtime(objset_t *os);
++inode_timespec_t dmu_objset_snap_cmtime(objset_t *os);
+
+ int dmu_objset_is_snapshot(objset_t *os);
+
+diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
+index a836e037..531e81d4 100644
+--- a/include/sys/dmu_objset.h
++++ b/include/sys/dmu_objset.h
+@@ -179,7 +179,7 @@ int dmu_objset_find_dp(struct dsl_pool *dp, uint64_t ddobj,
+ int func(struct dsl_pool *, struct dsl_dataset *, void *),
+ void *arg, int flags);
+ void dmu_objset_evict_dbufs(objset_t *os);
+-timestruc_t dmu_objset_snap_cmtime(objset_t *os);
++inode_timespec_t dmu_objset_snap_cmtime(objset_t *os);
+
+ /* called from dsl */
+ void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
+diff --git a/include/sys/dsl_dir.h b/include/sys/dsl_dir.h
+index 69b0b6a5..80e83fdc 100644
+--- a/include/sys/dsl_dir.h
++++ b/include/sys/dsl_dir.h
+@@ -103,7 +103,7 @@ struct dsl_dir {
+ /* Protected by dd_lock */
+ kmutex_t dd_lock;
+ list_t dd_props; /* list of dsl_prop_record_t's */
+- timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
++ inode_timespec_t dd_snap_cmtime; /* last snapshot namespace change */
+ uint64_t dd_origin_txg;
+
+ /* gross estimate of space used by in-flight tx's */
+@@ -159,7 +159,7 @@ boolean_t dsl_dir_is_clone(dsl_dir_t *dd);
+ void dsl_dir_new_refreservation(dsl_dir_t *dd, struct dsl_dataset *ds,
+ uint64_t reservation, cred_t *cr, dmu_tx_t *tx);
+ void dsl_dir_snap_cmtime_update(dsl_dir_t *dd);
+-timestruc_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
++inode_timespec_t dsl_dir_snap_cmtime(dsl_dir_t *dd);
+ void dsl_dir_set_reservation_sync_impl(dsl_dir_t *dd, uint64_t value,
+ dmu_tx_t *tx);
+ void dsl_dir_zapify(dsl_dir_t *dd, dmu_tx_t *tx);
+diff --git a/include/sys/spa_impl.h b/include/sys/spa_impl.h
+index b1e78c1d..fa7490ac 100644
+--- a/include/sys/spa_impl.h
++++ b/include/sys/spa_impl.h
+@@ -153,7 +153,7 @@ struct spa {
+ uint64_t spa_freeze_txg; /* freeze pool at this txg */
+ uint64_t spa_load_max_txg; /* best initial ub_txg */
+ uint64_t spa_claim_max_txg; /* highest claimed birth txg */
+- timespec_t spa_loaded_ts; /* 1st successful open time */
++ inode_timespec_t spa_loaded_ts; /* 1st successful open time */
+ objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
+ kmutex_t spa_evicting_os_lock; /* Evicting objset list lock */
+ list_t spa_evicting_os_list; /* Objsets being evicted. */
+diff --git a/include/sys/xvattr.h b/include/sys/xvattr.h
+index 4779b632..5d38927c 100644
+--- a/include/sys/xvattr.h
++++ b/include/sys/xvattr.h
+@@ -47,7 +47,7 @@
+ * Structure of all optional attributes.
+ */
+ typedef struct xoptattr {
+- timestruc_t xoa_createtime; /* Create time of file */
++ inode_timespec_t xoa_createtime; /* Create time of file */
+ uint8_t xoa_archive;
+ uint8_t xoa_system;
+ uint8_t xoa_readonly;
+diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h
+index 4fe35342..68c58f95 100644
+--- a/include/sys/zfs_context.h
++++ b/include/sys/zfs_context.h
+@@ -527,7 +527,7 @@ extern char *vn_dumpdir;
+ #define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
+
+ typedef struct xoptattr {
+- timestruc_t xoa_createtime; /* Create time of file */
++ inode_timespec_t xoa_createtime; /* Create time of file */
+ uint8_t xoa_archive;
+ uint8_t xoa_system;
+ uint8_t xoa_readonly;
+@@ -640,13 +640,6 @@ extern void delay(clock_t ticks);
+ #define USEC_TO_TICK(usec) ((usec) / (MICROSEC / hz))
+ #define NSEC_TO_TICK(usec) ((usec) / (NANOSEC / hz))
+
+-#define gethrestime_sec() time(NULL)
+-#define gethrestime(t) \
+- do {\
+- (t)->tv_sec = gethrestime_sec();\
+- (t)->tv_nsec = 0;\
+- } while (0);
+-
+ #define max_ncpus 64
+ #define boot_ncpus (sysconf(_SC_NPROCESSORS_ONLN))
+
+diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
+index c292f037..26d1eb37 100644
+--- a/include/sys/zfs_znode.h
++++ b/include/sys/zfs_znode.h
+@@ -270,19 +270,36 @@ typedef struct znode_hold {
+
+ extern unsigned int zfs_object_mutex_size;
+
+-/* Encode ZFS stored time values from a struct timespec */
++/*
++ * Encode ZFS stored time values from a struct timespec / struct timespec64.
++ */
+ #define ZFS_TIME_ENCODE(tp, stmp) \
+-{ \
++do { \
+ (stmp)[0] = (uint64_t)(tp)->tv_sec; \
+ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \
+-}
++} while (0)
+
+-/* Decode ZFS stored time values to a struct timespec */
++#if defined(HAVE_INODE_TIMESPEC64_TIMES)
++/*
++ * Decode ZFS stored time values to a struct timespec64
++ * 4.18 and newer kernels.
++ */
+ #define ZFS_TIME_DECODE(tp, stmp) \
+-{ \
+- (tp)->tv_sec = (time_t)(stmp)[0]; \
+- (tp)->tv_nsec = (long)(stmp)[1]; \
+-}
++do { \
++ (tp)->tv_sec = (time64_t)(stmp)[0]; \
++ (tp)->tv_nsec = (long)(stmp)[1]; \
++} while (0)
++#else
++/*
++ * Decode ZFS stored time values to a struct timespec
++ * 4.17 and older kernels.
++ */
++#define ZFS_TIME_DECODE(tp, stmp) \
++do { \
++ (tp)->tv_sec = (time_t)(stmp)[0]; \
++ (tp)->tv_nsec = (long)(stmp)[1]; \
++} while (0)
++#endif /* HAVE_INODE_TIMESPEC64_TIMES */
+
+ /*
+ * Timestamp defines
+diff --git a/include/sys/zpl.h b/include/sys/zpl.h
+index 65ed4313..e433fbc6 100644
+--- a/include/sys/zpl.h
++++ b/include/sys/zpl.h
+@@ -189,4 +189,13 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
+ }
+ #endif /* HAVE_VFS_ITERATE */
+
++/*
++ * Linux 4.18, inode times converted from timespec to timespec64.
++ */
++#if defined(HAVE_INODE_TIMESPEC64_TIMES)
++#define zpl_inode_timespec_trunc(ts, gran) timespec64_trunc(ts, gran)
++#else
++#define zpl_inode_timespec_trunc(ts, gran) timespec_trunc(ts, gran)
++#endif
++
+ #endif /* _SYS_ZPL_H */
+diff --git a/lib/libspl/Makefile.am b/lib/libspl/Makefile.am
+index 59bc8ffb..a6e63cb8 100644
+--- a/lib/libspl/Makefile.am
++++ b/lib/libspl/Makefile.am
+@@ -19,8 +19,6 @@ noinst_LTLIBRARIES = libspl.la
+
+ USER_C = \
+ getexecname.c \
+- gethrtime.c \
+- gethrestime.c \
+ getmntany.c \
+ list.c \
+ mkdirp.c \
+diff --git a/lib/libspl/gethrestime.c b/lib/libspl/gethrestime.c
+deleted file mode 100644
+index d37cc2d5..00000000
+--- a/lib/libspl/gethrestime.c
++++ /dev/null
+@@ -1,38 +0,0 @@
+-/*
+- * CDDL HEADER START
+- *
+- * The contents of this file are subject to the terms of the
+- * Common Development and Distribution License (the "License").
+- * You may not use this file except in compliance with the License.
+- *
+- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+- * or http://www.opensolaris.org/os/licensing.
+- * See the License for the specific language governing permissions
+- * and limitations under the License.
+- *
+- * When distributing Covered Code, include this CDDL HEADER in each
+- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+- * If applicable, add the following below this CDDL HEADER, with the
+- * fields enclosed by brackets "[]" replaced with your own identifying
+- * information: Portions Copyright [yyyy] [name of copyright owner]
+- *
+- * CDDL HEADER END
+- */
+-
+-/*
+- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+- * Use is subject to license terms.
+- */
+-
+-#include <time.h>
+-#include <sys/time.h>
+-
+-void
+-gethrestime(timestruc_t *ts)
+-{
+- struct timeval tv;
+-
+- gettimeofday(&tv, NULL);
+- ts->tv_sec = tv.tv_sec;
+- ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
+-}
+diff --git a/lib/libspl/gethrtime.c b/lib/libspl/gethrtime.c
+deleted file mode 100644
+index 95ceb18e..00000000
+--- a/lib/libspl/gethrtime.c
++++ /dev/null
+@@ -1,45 +0,0 @@
+-/*
+- * CDDL HEADER START
+- *
+- * The contents of this file are subject to the terms of the
+- * Common Development and Distribution License (the "License").
+- * You may not use this file except in compliance with the License.
+- *
+- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+- * or http://www.opensolaris.org/os/licensing.
+- * See the License for the specific language governing permissions
+- * and limitations under the License.
+- *
+- * When distributing Covered Code, include this CDDL HEADER in each
+- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+- * If applicable, add the following below this CDDL HEADER, with the
+- * fields enclosed by brackets "[]" replaced with your own identifying
+- * information: Portions Copyright [yyyy] [name of copyright owner]
+- *
+- * CDDL HEADER END
+- */
+-
+-/*
+- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+- * Use is subject to license terms.
+- */
+-
+-#include <time.h>
+-#include <sys/time.h>
+-#include <stdlib.h>
+-#include <stdio.h>
+-
+-hrtime_t
+-gethrtime(void)
+-{
+- struct timespec ts;
+- int rc;
+-
+- rc = clock_gettime(CLOCK_MONOTONIC, &ts);
+- if (rc) {
+- fprintf(stderr, "Error: clock_gettime() = %d\n", rc);
+- abort();
+- }
+-
+- return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec);
+-}
+diff --git a/lib/libspl/include/sys/time.h b/lib/libspl/include/sys/time.h
+index dc645fa5..04b3ba87 100644
+--- a/lib/libspl/include/sys/time.h
++++ b/lib/libspl/include/sys/time.h
+@@ -27,8 +27,9 @@
+ #ifndef _LIBSPL_SYS_TIME_H
+ #define _LIBSPL_SYS_TIME_H
+
+-#include_next <sys/time.h>
++#include <time.h>
+ #include <sys/types.h>
++#include_next <sys/time.h>
+
+ #ifndef SEC
+ #define SEC 1
+@@ -70,13 +71,33 @@
+ #define SEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / SEC))
+ #endif
+
+-
+ typedef long long hrtime_t;
+-typedef struct timespec timestruc_t;
+-typedef struct timespec timespec_t;
+-
+-
+-extern hrtime_t gethrtime(void);
+-extern void gethrestime(timestruc_t *);
++typedef struct timespec timespec_t;
++typedef struct timespec inode_timespec_t;
++
++static inline void
++gethrestime(inode_timespec_t *ts)
++{
++ struct timeval tv;
++ (void) gettimeofday(&tv, NULL);
++ ts->tv_sec = tv.tv_sec;
++ ts->tv_nsec = tv.tv_usec * NSEC_PER_USEC;
++}
++
++static inline time_t
++gethrestime_sec(void)
++{
++ struct timeval tv;
++ (void) gettimeofday(&tv, NULL);
++ return (tv.tv_sec);
++}
++
++static inline hrtime_t
++gethrtime(void)
++{
++ struct timespec ts;
++ (void) clock_gettime(CLOCK_MONOTONIC, &ts);
++ return ((((u_int64_t)ts.tv_sec) * NANOSEC) + ts.tv_nsec);
++}
+
+ #endif /* _LIBSPL_SYS_TIME_H */
+diff --git a/lib/libzpool/kernel.c b/lib/libzpool/kernel.c
+index e67d13c9..3ea8778b 100644
+--- a/lib/libzpool/kernel.c
++++ b/lib/libzpool/kernel.c
+@@ -498,7 +498,7 @@ cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime)
+ {
+ int error;
+ struct timeval tv;
+- timestruc_t ts;
++ struct timespec ts;
+ clock_t delta;
+
+ ASSERT3U(cv->cv_magic, ==, CV_MAGIC);
+@@ -536,7 +536,7 @@ cv_timedwait_hires(kcondvar_t *cv, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+ {
+ int error;
+ struct timeval tv;
+- timestruc_t ts;
++ struct timespec ts;
+ hrtime_t delta;
+
+ ASSERT(flag == 0 || flag == CALLOUT_FLAG_ABSOLUTE);
+diff --git a/module/zfs/dmu_objset.c b/module/zfs/dmu_objset.c
+index 3425d542..449ebedf 100644
+--- a/module/zfs/dmu_objset.c
++++ b/module/zfs/dmu_objset.c
+@@ -860,7 +860,7 @@ dmu_objset_evict_done(objset_t *os)
+ kmem_free(os, sizeof (objset_t));
+ }
+
+-timestruc_t
++inode_timespec_t
+ dmu_objset_snap_cmtime(objset_t *os)
+ {
+ return (dsl_dir_snap_cmtime(os->os_dsl_dataset->ds_dir));
+diff --git a/module/zfs/dsl_dir.c b/module/zfs/dsl_dir.c
+index a3ef5896..deecf6bc 100644
+--- a/module/zfs/dsl_dir.c
++++ b/module/zfs/dsl_dir.c
+@@ -1975,10 +1975,10 @@ dsl_dir_transfer_possible(dsl_dir_t *sdd, dsl_dir_t *tdd,
+ return (0);
+ }
+
+-timestruc_t
++inode_timespec_t
+ dsl_dir_snap_cmtime(dsl_dir_t *dd)
+ {
+- timestruc_t t;
++ inode_timespec_t t;
+
+ mutex_enter(&dd->dd_lock);
+ t = dd->dd_snap_cmtime;
+@@ -1990,7 +1990,7 @@ dsl_dir_snap_cmtime(dsl_dir_t *dd)
+ void
+ dsl_dir_snap_cmtime_update(dsl_dir_t *dd)
+ {
+- timestruc_t t;
++ inode_timespec_t t;
+
+ gethrestime(&t);
+ mutex_enter(&dd->dd_lock);
+diff --git a/module/zfs/fm.c b/module/zfs/fm.c
+index cb148149..9d26cc99 100644
+--- a/module/zfs/fm.c
++++ b/module/zfs/fm.c
+@@ -508,8 +508,8 @@ zfs_zevent_insert(zevent_t *ev)
+ int
+ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
+ {
++ inode_timespec_t tv;
+ int64_t tv_array[2];
+- timestruc_t tv;
+ uint64_t eid;
+ size_t nvl_size = 0;
+ zevent_t *ev;
+diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
+index 14af55c4..25edea78 100644
+--- a/module/zfs/zfs_ctldir.c
++++ b/module/zfs/zfs_ctldir.c
+@@ -449,7 +449,7 @@ static struct inode *
+ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
+ const struct file_operations *fops, const struct inode_operations *ops)
+ {
+- struct timespec now;
++ inode_timespec_t now;
+ struct inode *ip;
+ znode_t *zp;
+
+diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
+index 0d2b61a1..34ea751c 100644
+--- a/module/zfs/zfs_vnops.c
++++ b/module/zfs/zfs_vnops.c
+@@ -3158,7 +3158,7 @@ top:
+
+ if (mask & (ATTR_MTIME | ATTR_SIZE)) {
+ ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
+- ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime,
++ ZTOI(zp)->i_mtime = zpl_inode_timespec_trunc(vap->va_mtime,
+ ZTOI(zp)->i_sb->s_time_gran);
+
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+@@ -3167,7 +3167,7 @@ top:
+
+ if (mask & (ATTR_CTIME | ATTR_SIZE)) {
+ ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
+- ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime,
++ ZTOI(zp)->i_ctime = zpl_inode_timespec_trunc(vap->va_ctime,
+ ZTOI(zp)->i_sb->s_time_gran);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+ ctime, sizeof (ctime));
+diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c
+index f508a248..e222c791 100644
+--- a/module/zfs/zfs_znode.c
++++ b/module/zfs/zfs_znode.c
+@@ -700,7 +700,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
+ uint64_t rdev = 0;
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
+ dmu_buf_t *db;
+- timestruc_t now;
++ inode_timespec_t now;
+ uint64_t gen, obj;
+ int bonuslen;
+ int dnodesize;
+@@ -1349,7 +1349,7 @@ void
+ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
+ uint64_t ctime[2])
+ {
+- timestruc_t now;
++ inode_timespec_t now;
+
+ gethrestime(&now);
+
+diff --git a/module/zfs/zpl_inode.c b/module/zfs/zpl_inode.c
+index 3b5643d0..41b91cab 100644
+--- a/module/zfs/zpl_inode.c
++++ b/module/zfs/zpl_inode.c
+@@ -384,9 +384,10 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
+ vap->va_mtime = ia->ia_mtime;
+ vap->va_ctime = ia->ia_ctime;
+
+- if (vap->va_mask & ATTR_ATIME)
+- ip->i_atime = timespec_trunc(ia->ia_atime,
++ if (vap->va_mask & ATTR_ATIME) {
++ ip->i_atime = zpl_inode_timespec_trunc(ia->ia_atime,
+ ip->i_sb->s_time_gran);
++ }
+
+ cookie = spl_fstrans_mark();
+ error = -zfs_setattr(ip, vap, 0, cr);
diff --git a/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch b/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch
new file mode 100644
index 0000000..e75a02c
--- /dev/null
+++ b/zfs-patches/0007-Linux-compat-4.18-check_disk_size_change.patch
@@ -0,0 +1,808 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Fri, 15 Jun 2018 15:05:21 -0700
+Subject: [PATCH] Linux compat 4.18: check_disk_size_change()
+
+Added support for the bops->check_events() interface which was
+added in the 2.6.38 kernel to replace bops->media_changed().
+Fully implementing this functionality allows the volume resize
+code to rely on revalidate_disk(), which is the preferred
+mechanism, and removes the need to use check_disk_size_change().
+
+In order for bops->check_events() to lookup the zvol_state_t
+stored in the disk->private_data the zvol_state_lock needs to
+be held. Since the check events interface may poll the mutex
+has been converted to a rwlock for better concurrently. The
+rwlock need only be taken as a writer in the zvol_free() path
+when disk->private_data is set to NULL.
+
+The configure checks for the block_device_operations structure
+were consolidated in a single kernel-block-device-operations.m4
+file.
+
+The ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS configure checks
+and assoicated dead code was removed. This interface was added
+to the 2.6.28 kernel which predates the oldest supported 2.6.32
+kernel and will therefore always be available.
+
+Updated maximum Linux version in META file. The 4.17 kernel
+was released on 2018-06-03 and ZoL is compatible with the
+finalized kernel.
+
+Reviewed-by: Boris Protopopov <boris.protopopov at actifio.com>
+Reviewed-by: Sara Hartse <sara.hartse at delphix.com>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7611
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ config/kernel-bdev-block-device-operations.m4 | 34 ---
+ .../kernel-block-device-operations-release-void.m4 | 29 ---
+ config/kernel-block-device-operations.m4 | 57 +++++
+ config/kernel.m4 | 2 +-
+ include/linux/blkdev_compat.h | 1 +
+ module/zfs/zvol.c | 259 +++++++++------------
+ 6 files changed, 174 insertions(+), 208 deletions(-)
+ delete mode 100644 config/kernel-bdev-block-device-operations.m4
+ delete mode 100644 config/kernel-block-device-operations-release-void.m4
+ create mode 100644 config/kernel-block-device-operations.m4
+
+diff --git a/config/kernel-bdev-block-device-operations.m4 b/config/kernel-bdev-block-device-operations.m4
+deleted file mode 100644
+index faacc195..00000000
+--- a/config/kernel-bdev-block-device-operations.m4
++++ /dev/null
+@@ -1,34 +0,0 @@
+-dnl #
+-dnl # 2.6.x API change
+-dnl #
+-AC_DEFUN([ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS], [
+- AC_MSG_CHECKING([block device operation prototypes])
+- tmp_flags="$EXTRA_KCFLAGS"
+- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+- ZFS_LINUX_TRY_COMPILE([
+- #include <linux/blkdev.h>
+-
+- int blk_open(struct block_device *bdev, fmode_t mode)
+- { return 0; }
+- int blk_ioctl(struct block_device *bdev, fmode_t mode,
+- unsigned x, unsigned long y) { return 0; }
+- int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
+- unsigned x, unsigned long y) { return 0; }
+-
+- static const struct block_device_operations
+- bops __attribute__ ((unused)) = {
+- .open = blk_open,
+- .release = NULL,
+- .ioctl = blk_ioctl,
+- .compat_ioctl = blk_compat_ioctl,
+- };
+- ],[
+- ],[
+- AC_MSG_RESULT(struct block_device)
+- AC_DEFINE(HAVE_BDEV_BLOCK_DEVICE_OPERATIONS, 1,
+- [struct block_device_operations use bdevs])
+- ],[
+- AC_MSG_RESULT(struct inode)
+- ])
+- EXTRA_KCFLAGS="$tmp_flags"
+-])
+diff --git a/config/kernel-block-device-operations-release-void.m4 b/config/kernel-block-device-operations-release-void.m4
+deleted file mode 100644
+index a73f8587..00000000
+--- a/config/kernel-block-device-operations-release-void.m4
++++ /dev/null
+@@ -1,29 +0,0 @@
+-dnl #
+-dnl # 3.10.x API change
+-dnl #
+-AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
+- AC_MSG_CHECKING([whether block_device_operations.release is void])
+- tmp_flags="$EXTRA_KCFLAGS"
+- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+- ZFS_LINUX_TRY_COMPILE([
+- #include <linux/blkdev.h>
+-
+- void blk_release(struct gendisk *g, fmode_t mode) { return; }
+-
+- static const struct block_device_operations
+- bops __attribute__ ((unused)) = {
+- .open = NULL,
+- .release = blk_release,
+- .ioctl = NULL,
+- .compat_ioctl = NULL,
+- };
+- ],[
+- ],[
+- AC_MSG_RESULT(void)
+- AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
+- [struct block_device_operations.release returns void])
+- ],[
+- AC_MSG_RESULT(int)
+- ])
+- EXTRA_KCFLAGS="$tmp_flags"
+-])
+diff --git a/config/kernel-block-device-operations.m4 b/config/kernel-block-device-operations.m4
+new file mode 100644
+index 00000000..5f2811c1
+--- /dev/null
++++ b/config/kernel-block-device-operations.m4
+@@ -0,0 +1,57 @@
++dnl #
++dnl # 2.6.38 API change
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS], [
++ AC_MSG_CHECKING([whether bops->check_events() exists])
++ tmp_flags="$EXTRA_KCFLAGS"
++ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
++ ZFS_LINUX_TRY_COMPILE([
++ #include <linux/blkdev.h>
++
++ unsigned int blk_check_events(struct gendisk *disk,
++ unsigned int clearing) { return (0); }
++
++ static const struct block_device_operations
++ bops __attribute__ ((unused)) = {
++ .check_events = blk_check_events,
++ };
++ ],[
++ ],[
++ AC_MSG_RESULT(yes)
++ AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS, 1,
++ [bops->check_events() exists])
++ ],[
++ AC_MSG_RESULT(no)
++ ])
++ EXTRA_KCFLAGS="$tmp_flags"
++])
++
++dnl #
++dnl # 3.10.x API change
++dnl #
++AC_DEFUN([ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID], [
++ AC_MSG_CHECKING([whether bops->release() is void])
++ tmp_flags="$EXTRA_KCFLAGS"
++ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
++ ZFS_LINUX_TRY_COMPILE([
++ #include <linux/blkdev.h>
++
++ void blk_release(struct gendisk *g, fmode_t mode) { return; }
++
++ static const struct block_device_operations
++ bops __attribute__ ((unused)) = {
++ .open = NULL,
++ .release = blk_release,
++ .ioctl = NULL,
++ .compat_ioctl = NULL,
++ };
++ ],[
++ ],[
++ AC_MSG_RESULT(void)
++ AC_DEFINE(HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID, 1,
++ [bops->release() returns void])
++ ],[
++ AC_MSG_RESULT(int)
++ ])
++ EXTRA_KCFLAGS="$tmp_flags"
++])
+diff --git a/config/kernel.m4 b/config/kernel.m4
+index 375e4b79..c7ca260c 100644
+--- a/config/kernel.m4
++++ b/config/kernel.m4
+@@ -12,7 +12,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
+ ZFS_AC_KERNEL_CURRENT_BIO_TAIL
+ ZFS_AC_KERNEL_SUPER_USER_NS
+ ZFS_AC_KERNEL_SUBMIT_BIO
+- ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
++ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
+ ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
+ ZFS_AC_KERNEL_TYPE_FMODE_T
+ ZFS_AC_KERNEL_3ARG_BLKDEV_GET
+diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
+index f99980ab..27f05662 100644
+--- a/include/linux/blkdev_compat.h
++++ b/include/linux/blkdev_compat.h
+@@ -32,6 +32,7 @@
+ #include <linux/blkdev.h>
+ #include <linux/elevator.h>
+ #include <linux/backing-dev.h>
++#include <linux/msdos_fs.h> /* for SECTOR_* */
+
+ #ifndef HAVE_FMODE_T
+ typedef unsigned __bitwise__ fmode_t;
+diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c
+index ffa5fac7..03f95630 100644
+--- a/module/zfs/zvol.c
++++ b/module/zfs/zvol.c
+@@ -99,7 +99,7 @@ unsigned long zvol_max_discard_blocks = 16384;
+ unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
+
+ static taskq_t *zvol_taskq;
+-static kmutex_t zvol_state_lock;
++static krwlock_t zvol_state_lock;
+ static list_t zvol_state_list;
+
+ #define ZVOL_HT_SIZE 1024
+@@ -176,17 +176,17 @@ zvol_find_by_dev(dev_t dev)
+ {
+ zvol_state_t *zv;
+
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_READER);
+ for (zv = list_head(&zvol_state_list); zv != NULL;
+ zv = list_next(&zvol_state_list, zv)) {
+ mutex_enter(&zv->zv_state_lock);
+ if (zv->zv_dev == dev) {
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+ return (zv);
+ }
+ mutex_exit(&zv->zv_state_lock);
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+ return (NULL);
+ }
+@@ -204,7 +204,7 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
+ zvol_state_t *zv;
+ struct hlist_node *p = NULL;
+
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_READER);
+ hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
+ zv = hlist_entry(p, zvol_state_t, zv_hlink);
+ mutex_enter(&zv->zv_state_lock);
+@@ -227,12 +227,12 @@ zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
+ strncmp(zv->zv_name, name, MAXNAMELEN)
+ == 0);
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+ return (zv);
+ }
+ mutex_exit(&zv->zv_state_lock);
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+ return (NULL);
+ }
+@@ -339,24 +339,6 @@ zvol_get_stats(objset_t *os, nvlist_t *nv)
+ return (SET_ERROR(error));
+ }
+
+-static void
+-zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
+-{
+- struct block_device *bdev;
+-
+- ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+-
+- bdev = bdget_disk(zv->zv_disk, 0);
+- if (bdev == NULL)
+- return;
+-
+- set_capacity(zv->zv_disk, volsize >> 9);
+- zv->zv_volsize = volsize;
+- check_disk_size_change(zv->zv_disk, bdev);
+-
+- bdput(bdev);
+-}
+-
+ /*
+ * Sanity check volume size.
+ */
+@@ -409,31 +391,17 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)
+ return (error);
+ }
+
+-static int
+-zvol_update_live_volsize(zvol_state_t *zv, uint64_t volsize)
+-{
+- zvol_size_changed(zv, volsize);
+-
+- /*
+- * We should post a event here describing the expansion. However,
+- * the zfs_ereport_post() interface doesn't nicely support posting
+- * events for zvols, it assumes events relate to vdevs or zios.
+- */
+-
+- return (0);
+-}
+-
+ /*
+- * Set ZFS_PROP_VOLSIZE set entry point.
++ * Set ZFS_PROP_VOLSIZE set entry point. Note that modifying the volume
++ * size will result in a udev "change" event being generated.
+ */
+ int
+ zvol_set_volsize(const char *name, uint64_t volsize)
+ {
+- zvol_state_t *zv = NULL;
+ objset_t *os = NULL;
+- int error;
+- dmu_object_info_t *doi;
++ struct gendisk *disk = NULL;
+ uint64_t readonly;
++ int error;
+ boolean_t owned = B_FALSE;
+
+ error = dsl_prop_get_integer(name,
+@@ -443,7 +411,7 @@ zvol_set_volsize(const char *name, uint64_t volsize)
+ if (readonly)
+ return (SET_ERROR(EROFS));
+
+- zv = zvol_find_by_name(name, RW_READER);
++ zvol_state_t *zv = zvol_find_by_name(name, RW_READER);
+
+ ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_READ_HELD(&zv->zv_suspend_lock)));
+@@ -464,16 +432,18 @@ zvol_set_volsize(const char *name, uint64_t volsize)
+ os = zv->zv_objset;
+ }
+
+- doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
++ dmu_object_info_t *doi = kmem_alloc(sizeof (*doi), KM_SLEEP);
+
+ if ((error = dmu_object_info(os, ZVOL_OBJ, doi)) ||
+ (error = zvol_check_volsize(volsize, doi->doi_data_block_size)))
+ goto out;
+
+ error = zvol_update_volsize(volsize, os);
+-
+- if (error == 0 && zv != NULL)
+- error = zvol_update_live_volsize(zv, volsize);
++ if (error == 0 && zv != NULL) {
++ zv->zv_volsize = volsize;
++ zv->zv_changed = 1;
++ disk = zv->zv_disk;
++ }
+ out:
+ kmem_free(doi, sizeof (dmu_object_info_t));
+
+@@ -488,6 +458,9 @@ out:
+ if (zv != NULL)
+ mutex_exit(&zv->zv_state_lock);
+
++ if (disk != NULL)
++ revalidate_disk(disk);
++
+ return (SET_ERROR(error));
+ }
+
+@@ -543,8 +516,8 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
+ if (zv == NULL)
+ return (SET_ERROR(ENXIO));
+
+- ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+- RW_READ_HELD(&zv->zv_suspend_lock));
++ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
++ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+
+ if (zv->zv_flags & ZVOL_RDONLY) {
+ mutex_exit(&zv->zv_state_lock);
+@@ -1120,7 +1093,7 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
+ static void
+ zvol_insert(zvol_state_t *zv)
+ {
+- ASSERT(MUTEX_HELD(&zvol_state_lock));
++ ASSERT(RW_WRITE_HELD(&zvol_state_lock));
+ ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
+ list_insert_head(&zvol_state_list, zv);
+ hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+@@ -1132,7 +1105,7 @@ zvol_insert(zvol_state_t *zv)
+ static void
+ zvol_remove(zvol_state_t *zv)
+ {
+- ASSERT(MUTEX_HELD(&zvol_state_lock));
++ ASSERT(RW_WRITE_HELD(&zvol_state_lock));
+ list_remove(&zvol_state_list, zv);
+ hlist_del(&zv->zv_hlink);
+ }
+@@ -1148,8 +1121,8 @@ zvol_setup_zv(zvol_state_t *zv)
+ uint64_t ro;
+ objset_t *os = zv->zv_objset;
+
+- ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+- RW_LOCK_HELD(&zv->zv_suspend_lock));
++ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
++ ASSERT(RW_LOCK_HELD(&zv->zv_suspend_lock));
+
+ error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
+ if (error)
+@@ -1227,8 +1200,8 @@ zvol_suspend(const char *name)
+ return (NULL);
+
+ /* block all I/O, release in zvol_resume. */
+- ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+- RW_WRITE_HELD(&zv->zv_suspend_lock));
++ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
++ ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
+
+ atomic_inc(&zv->zv_suspend_ref);
+
+@@ -1349,9 +1322,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+ int error = 0;
+ boolean_t drop_suspend = B_TRUE;
+
+- ASSERT(!MUTEX_HELD(&zvol_state_lock));
+-
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_READER);
+ /*
+ * Obtain a copy of private_data under the zvol_state_lock to make
+ * sure that either the result of zvol free code path setting
+@@ -1360,7 +1331,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+ */
+ zv = bdev->bd_disk->private_data;
+ if (zv == NULL) {
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+ return (SET_ERROR(-ENXIO));
+ }
+
+@@ -1384,7 +1355,7 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+ } else {
+ drop_suspend = B_FALSE;
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+ ASSERT(zv->zv_open_count != 0 || RW_READ_HELD(&zv->zv_suspend_lock));
+@@ -1402,11 +1373,18 @@ zvol_open(struct block_device *bdev, fmode_t flag)
+
+ zv->zv_open_count++;
+
++ mutex_exit(&zv->zv_state_lock);
++ if (drop_suspend)
++ rw_exit(&zv->zv_suspend_lock);
++
+ check_disk_change(bdev);
+
++ return (0);
++
+ out_open_count:
+ if (zv->zv_open_count == 0)
+ zvol_last_close(zv);
++
+ out_mutex:
+ mutex_exit(&zv->zv_state_lock);
+ if (drop_suspend)
+@@ -1427,9 +1405,7 @@ zvol_release(struct gendisk *disk, fmode_t mode)
+ zvol_state_t *zv;
+ boolean_t drop_suspend = B_TRUE;
+
+- ASSERT(!MUTEX_HELD(&zvol_state_lock));
+-
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_READER);
+ zv = disk->private_data;
+
+ mutex_enter(&zv->zv_state_lock);
+@@ -1453,7 +1429,7 @@ zvol_release(struct gendisk *disk, fmode_t mode)
+ } else {
+ drop_suspend = B_FALSE;
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+ ASSERT(zv->zv_open_count != 1 || RW_READ_HELD(&zv->zv_suspend_lock));
+@@ -1479,7 +1455,7 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
+ zvol_state_t *zv = bdev->bd_disk->private_data;
+ int error = 0;
+
+- ASSERT(zv && zv->zv_open_count > 0);
++ ASSERT3U(zv->zv_open_count, >, 0);
+
+ switch (cmd) {
+ case BLKFLSBUF:
+@@ -1519,23 +1495,62 @@ zvol_compat_ioctl(struct block_device *bdev, fmode_t mode,
+ #define zvol_compat_ioctl NULL
+ #endif
+
++/*
++ * Linux 2.6.38 preferred interface.
++ */
++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
++static unsigned int
++zvol_check_events(struct gendisk *disk, unsigned int clearing)
++{
++ unsigned int mask = 0;
++
++ rw_enter(&zvol_state_lock, RW_READER);
++
++ zvol_state_t *zv = disk->private_data;
++ if (zv != NULL) {
++ mutex_enter(&zv->zv_state_lock);
++ mask = zv->zv_changed ? DISK_EVENT_MEDIA_CHANGE : 0;
++ zv->zv_changed = 0;
++ mutex_exit(&zv->zv_state_lock);
++ }
++
++ rw_exit(&zvol_state_lock);
++
++ return (mask);
++}
++#else
+ static int zvol_media_changed(struct gendisk *disk)
+ {
++ int changed = 0;
++
++ rw_enter(&zvol_state_lock, RW_READER);
++
+ zvol_state_t *zv = disk->private_data;
++ if (zv != NULL) {
++ mutex_enter(&zv->zv_state_lock);
++ changed = zv->zv_changed;
++ zv->zv_changed = 0;
++ mutex_exit(&zv->zv_state_lock);
++ }
+
+- ASSERT(zv && zv->zv_open_count > 0);
++ rw_exit(&zvol_state_lock);
+
+- return (zv->zv_changed);
++ return (changed);
+ }
++#endif
+
+ static int zvol_revalidate_disk(struct gendisk *disk)
+ {
+- zvol_state_t *zv = disk->private_data;
++ rw_enter(&zvol_state_lock, RW_READER);
+
+- ASSERT(zv && zv->zv_open_count > 0);
++ zvol_state_t *zv = disk->private_data;
++ if (zv != NULL) {
++ mutex_enter(&zv->zv_state_lock);
++ set_capacity(zv->zv_disk, zv->zv_volsize >> SECTOR_BITS);
++ mutex_exit(&zv->zv_state_lock);
++ }
+
+- zv->zv_changed = 0;
+- set_capacity(zv->zv_disk, zv->zv_volsize >> 9);
++ rw_exit(&zvol_state_lock);
+
+ return (0);
+ }
+@@ -1552,7 +1567,7 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
+ zvol_state_t *zv = bdev->bd_disk->private_data;
+ sector_t sectors;
+
+- ASSERT(zv && zv->zv_open_count > 0);
++ ASSERT3U(zv->zv_open_count, >, 0);
+
+ sectors = get_capacity(zv->zv_disk);
+
+@@ -1585,68 +1600,20 @@ zvol_probe(dev_t dev, int *part, void *arg)
+ return (kobj);
+ }
+
+-#ifdef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS
+ static struct block_device_operations zvol_ops = {
+ .open = zvol_open,
+ .release = zvol_release,
+ .ioctl = zvol_ioctl,
+ .compat_ioctl = zvol_compat_ioctl,
+- .media_changed = zvol_media_changed,
+- .revalidate_disk = zvol_revalidate_disk,
+- .getgeo = zvol_getgeo,
+- .owner = THIS_MODULE,
+-};
+-
+-#else /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
+-
+-static int
+-zvol_open_by_inode(struct inode *inode, struct file *file)
+-{
+- return (zvol_open(inode->i_bdev, file->f_mode));
+-}
+-
+-static int
+-zvol_release_by_inode(struct inode *inode, struct file *file)
+-{
+- return (zvol_release(inode->i_bdev->bd_disk, file->f_mode));
+-}
+-
+-static int
+-zvol_ioctl_by_inode(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg)
+-{
+- if (file == NULL || inode == NULL)
+- return (SET_ERROR(-EINVAL));
+-
+- return (zvol_ioctl(inode->i_bdev, file->f_mode, cmd, arg));
+-}
+-
+-#ifdef CONFIG_COMPAT
+-static long
+-zvol_compat_ioctl_by_inode(struct file *file,
+- unsigned int cmd, unsigned long arg)
+-{
+- if (file == NULL)
+- return (SET_ERROR(-EINVAL));
+-
+- return (zvol_compat_ioctl(file->f_dentry->d_inode->i_bdev,
+- file->f_mode, cmd, arg));
+-}
++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
++ .check_events = zvol_check_events,
+ #else
+-#define zvol_compat_ioctl_by_inode NULL
+-#endif
+-
+-static struct block_device_operations zvol_ops = {
+- .open = zvol_open_by_inode,
+- .release = zvol_release_by_inode,
+- .ioctl = zvol_ioctl_by_inode,
+- .compat_ioctl = zvol_compat_ioctl_by_inode,
+ .media_changed = zvol_media_changed,
++#endif
+ .revalidate_disk = zvol_revalidate_disk,
+ .getgeo = zvol_getgeo,
+ .owner = THIS_MODULE,
+ };
+-#endif /* HAVE_BDEV_BLOCK_DEVICE_OPERATIONS */
+
+ /*
+ * Allocate memory for a new zvol_state_t and setup the required
+@@ -1699,6 +1666,10 @@ zvol_alloc(dev_t dev, const char *name)
+ rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
+
+ zv->zv_disk->major = zvol_major;
++#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_CHECK_EVENTS
++ zv->zv_disk->events = DISK_EVENT_MEDIA_CHANGE;
++#endif
++
+ if (volmode == ZFS_VOLMODE_DEV) {
+ /*
+ * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set
+@@ -1743,7 +1714,6 @@ zvol_free(void *arg)
+ {
+ zvol_state_t *zv = arg;
+
+- ASSERT(!MUTEX_HELD(&zvol_state_lock));
+ ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+ ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
+ ASSERT(zv->zv_open_count == 0);
+@@ -1870,9 +1840,9 @@ out_doi:
+ kmem_free(doi, sizeof (dmu_object_info_t));
+
+ if (error == 0) {
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_WRITER);
+ zvol_insert(zv);
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+ add_disk(zv->zv_disk);
+ } else {
+ ida_simple_remove(&zvol_ida, idx);
+@@ -1889,7 +1859,7 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
+ {
+ int readonly = get_disk_ro(zv->zv_disk);
+
+- ASSERT(MUTEX_HELD(&zvol_state_lock));
++ ASSERT(RW_LOCK_HELD(&zvol_state_lock));
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
+ strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+@@ -2129,7 +2099,7 @@ zvol_remove_minors_impl(const char *name)
+ list_create(&free_list, sizeof (zvol_state_t),
+ offsetof(zvol_state_t, zv_next));
+
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_WRITER);
+
+ for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
+ zv_next = list_next(&zvol_state_list, zv);
+@@ -2154,15 +2124,15 @@ zvol_remove_minors_impl(const char *name)
+ zvol_remove(zv);
+
+ /*
+- * clear this while holding zvol_state_lock so
+- * zvol_open won't open it
++ * Cleared while holding zvol_state_lock as a writer
++ * which will prevent zvol_open() from opening it.
+ */
+ zv->zv_disk->private_data = NULL;
+
+ /* Drop zv_state_lock before zvol_free() */
+ mutex_exit(&zv->zv_state_lock);
+
+- /* try parallel zv_free, if failed do it in place */
++ /* Try parallel zv_free, if failed do it in place */
+ t = taskq_dispatch(system_taskq, zvol_free, zv,
+ TQ_SLEEP);
+ if (t == TASKQID_INVALID)
+@@ -2173,11 +2143,9 @@ zvol_remove_minors_impl(const char *name)
+ mutex_exit(&zv->zv_state_lock);
+ }
+ }
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+- /*
+- * Drop zvol_state_lock before calling zvol_free()
+- */
++ /* Drop zvol_state_lock before calling zvol_free() */
+ while ((zv = list_head(&free_list)) != NULL) {
+ list_remove(&free_list, zv);
+ zvol_free(zv);
+@@ -2196,7 +2164,7 @@ zvol_remove_minor_impl(const char *name)
+ if (zvol_inhibit_dev)
+ return;
+
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_WRITER);
+
+ for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
+ zv_next = list_next(&zvol_state_list, zv);
+@@ -2216,7 +2184,10 @@ zvol_remove_minor_impl(const char *name)
+ }
+ zvol_remove(zv);
+
+- /* clear this so zvol_open won't open it */
++ /*
++ * Cleared while holding zvol_state_lock as a writer
++ * which will prevent zvol_open() from opening it.
++ */
+ zv->zv_disk->private_data = NULL;
+
+ mutex_exit(&zv->zv_state_lock);
+@@ -2227,7 +2198,7 @@ zvol_remove_minor_impl(const char *name)
+ }
+
+ /* Drop zvol_state_lock before calling zvol_free() */
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+
+ if (zv != NULL)
+ zvol_free(zv);
+@@ -2248,7 +2219,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
+ oldnamelen = strlen(oldname);
+ newnamelen = strlen(newname);
+
+- mutex_enter(&zvol_state_lock);
++ rw_enter(&zvol_state_lock, RW_READER);
+
+ for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
+ zv_next = list_next(&zvol_state_list, zv);
+@@ -2276,7 +2247,7 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
+ mutex_exit(&zv->zv_state_lock);
+ }
+
+- mutex_exit(&zvol_state_lock);
++ rw_exit(&zvol_state_lock);
+ }
+
+ typedef struct zvol_snapdev_cb_arg {
+@@ -2653,7 +2624,7 @@ zvol_init(void)
+
+ list_create(&zvol_state_list, sizeof (zvol_state_t),
+ offsetof(zvol_state_t, zv_next));
+- mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
++ rw_init(&zvol_state_lock, NULL, RW_DEFAULT, NULL);
+ ida_init(&zvol_ida);
+
+ zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
+@@ -2690,7 +2661,7 @@ out_taskq:
+ taskq_destroy(zvol_taskq);
+ out:
+ ida_destroy(&zvol_ida);
+- mutex_destroy(&zvol_state_lock);
++ rw_destroy(&zvol_state_lock);
+ list_destroy(&zvol_state_list);
+
+ return (SET_ERROR(error));
+@@ -2707,7 +2678,7 @@ zvol_fini(void)
+
+ taskq_destroy(zvol_taskq);
+ list_destroy(&zvol_state_list);
+- mutex_destroy(&zvol_state_lock);
++ rw_destroy(&zvol_state_lock);
+
+ ida_destroy(&zvol_ida);
+ }
diff --git a/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch b/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch
new file mode 100644
index 0000000..f6498c9
--- /dev/null
+++ b/zfs-patches/0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch
@@ -0,0 +1,368 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Prakash Surya <prakash.surya at delphix.com>
+Date: Mon, 8 Jan 2018 13:45:53 -0800
+Subject: [PATCH] OpenZFS 8997 - ztest assertion failure in zil_lwb_write_issue
+
+PROBLEM
+=======
+
+When `dmu_tx_assign` is called from `zil_lwb_write_issue`, it's possible
+for either `ERESTART` or `EIO` to be returned.
+
+If `ERESTART` is returned, this will cause an assertion to fail directly
+in `zil_lwb_write_issue`, where the code assumes the return value is
+`EIO` if `dmu_tx_assign` returns a non-zero value. This can occur if the
+SPA is suspended when `dmu_tx_assign` is called, and most often occurs
+when running `zloop`.
+
+If `EIO` is returned, this can cause assertions to fail elsewhere in the
+ZIL code. For example, `zil_commit_waiter_timeout` contains the
+following logic:
+
+ lwb_t *nlwb = zil_lwb_write_issue(zilog, lwb);
+ ASSERT3S(lwb->lwb_state, !=, LWB_STATE_OPENED);
+
+In this case, if `dmu_tx_assign` returned `EIO` from within
+`zil_lwb_write_issue`, the `lwb` variable passed in will not be issued
+to disk. Thus, it's `lwb_state` field will remain `LWB_STATE_OPENED` and
+this assertion will fail. `zil_commit_waiter_timeout` assumes that after
+it calls `zil_lwb_write_issue`, the `lwb` will be issued to disk, and
+doesn't handle the case where this is not true; i.e. it doesn't handle
+the case where `dmu_tx_assign` returns `EIO`.
+
+SOLUTION
+========
+
+This change modifies the `dmu_tx_assign` function such that `txg_how` is
+a bitmask, rather than of the `txg_how_t` enum type. Now, the previous
+`TXG_WAITED` semantics can be used via `TXG_NOTHROTTLE`, along with
+specifying either `TXG_NOWAIT` or `TXG_WAIT` semantics.
+
+Previously, when `TXG_WAITED` was specified, `TXG_NOWAIT` semantics was
+automatically invoked. This was not ideal when using `TXG_WAITED` within
+`zil_lwb_write_issued`, leading the problem described above. Rather, we
+want to achieve the semantics of `TXG_WAIT`, while also preventing the
+`tx` from being penalized via the dirty delay throttling.
+
+With this change, `zil_lwb_write_issued` can acheive the semtantics that
+it requires by passing in the value `TXG_WAIT | TXG_NOTHROTTLE` to
+`dmu_tx_assign`.
+
+Further, consumers of `dmu_tx_assign` wishing to achieve the old
+`TXG_WAITED` semantics can pass in the value `TXG_NOWAIT | TXG_NOTHROTTLE`.
+
+Authored by: Prakash Surya <prakash.surya at delphix.com>
+Approved by: Robert Mustacchi <rm at joyent.com>
+Reviewed by: Matt Ahrens <mahrens at delphix.com>
+Reviewed by: Andriy Gapon <avg at FreeBSD.org>
+Ported-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+
+Porting Notes:
+- Additionally updated `zfs_tmpfile` to use `TXG_NOTHROTTLE`
+
+OpenZFS-issue: https://www.illumos.org/issues/8997
+OpenZFS-commit: https://github.com/openzfs/openzfs/commit/19ea6cb0f9
+Closes #7084
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ include/sys/dmu.h | 15 +++++++------
+ include/sys/dmu_tx.h | 8 +++----
+ module/zfs/dmu_tx.c | 57 ++++++++++++++++++++++++++------------------------
+ module/zfs/zfs_vnops.c | 21 ++++++++++---------
+ module/zfs/zil.c | 10 ++++++++-
+ 5 files changed, 63 insertions(+), 48 deletions(-)
+
+diff --git a/include/sys/dmu.h b/include/sys/dmu.h
+index 755a9056..5b355afb 100644
+--- a/include/sys/dmu.h
++++ b/include/sys/dmu.h
+@@ -227,11 +227,14 @@ typedef enum dmu_object_type {
+ DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
+ } dmu_object_type_t;
+
+-typedef enum txg_how {
+- TXG_WAIT = 1,
+- TXG_NOWAIT,
+- TXG_WAITED,
+-} txg_how_t;
++/*
++ * These flags are intended to be used to specify the "txg_how"
++ * parameter when calling the dmu_tx_assign() function. See the comment
++ * above dmu_tx_assign() for more details on the meaning of these flags.
++ */
++#define TXG_NOWAIT (0ULL)
++#define TXG_WAIT (1ULL<<0)
++#define TXG_NOTHROTTLE (1ULL<<1)
+
+ void byteswap_uint64_array(void *buf, size_t size);
+ void byteswap_uint32_array(void *buf, size_t size);
+@@ -694,7 +697,7 @@ void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
+ void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
+ void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
+ void dmu_tx_abort(dmu_tx_t *tx);
+-int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+ void dmu_tx_wait(dmu_tx_t *tx);
+ void dmu_tx_commit(dmu_tx_t *tx);
+ void dmu_tx_mark_netfree(dmu_tx_t *tx);
+diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
+index d82a7931..74b7e111 100644
+--- a/include/sys/dmu_tx.h
++++ b/include/sys/dmu_tx.h
+@@ -67,9 +67,6 @@ struct dmu_tx {
+ /* placeholder for syncing context, doesn't need specific holds */
+ boolean_t tx_anyobj;
+
+- /* has this transaction already been delayed? */
+- boolean_t tx_waited;
+-
+ /* transaction is marked as being a "net free" of space */
+ boolean_t tx_netfree;
+
+@@ -79,6 +76,9 @@ struct dmu_tx {
+ /* need to wait for sufficient dirty space */
+ boolean_t tx_wait_dirty;
+
++ /* has this transaction already been delayed? */
++ boolean_t tx_dirty_delayed;
++
+ int tx_err;
+ };
+
+@@ -138,7 +138,7 @@ extern dmu_tx_stats_t dmu_tx_stats;
+ * These routines are defined in dmu.h, and are called by the user.
+ */
+ dmu_tx_t *dmu_tx_create(objset_t *dd);
+-int dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how);
++int dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how);
+ void dmu_tx_commit(dmu_tx_t *tx);
+ void dmu_tx_abort(dmu_tx_t *tx);
+ uint64_t dmu_tx_get_txg(dmu_tx_t *tx);
+diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
+index c3cc03a6..6ebff267 100644
+--- a/module/zfs/dmu_tx.c
++++ b/module/zfs/dmu_tx.c
+@@ -854,7 +854,7 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
+ * decreasing performance.
+ */
+ static int
+-dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
++dmu_tx_try_assign(dmu_tx_t *tx, uint64_t txg_how)
+ {
+ spa_t *spa = tx->tx_pool->dp_spa;
+
+@@ -878,13 +878,13 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
+ * of the failuremode setting.
+ */
+ if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE &&
+- txg_how != TXG_WAIT)
++ !(txg_how & TXG_WAIT))
+ return (SET_ERROR(EIO));
+
+ return (SET_ERROR(ERESTART));
+ }
+
+- if (!tx->tx_waited &&
++ if (!tx->tx_dirty_delayed &&
+ dsl_pool_need_dirty_delay(tx->tx_pool)) {
+ tx->tx_wait_dirty = B_TRUE;
+ DMU_TX_STAT_BUMP(dmu_tx_dirty_delay);
+@@ -976,41 +976,44 @@ dmu_tx_unassign(dmu_tx_t *tx)
+ }
+
+ /*
+- * Assign tx to a transaction group. txg_how can be one of:
++ * Assign tx to a transaction group; txg_how is a bitmask:
+ *
+- * (1) TXG_WAIT. If the current open txg is full, waits until there's
+- * a new one. This should be used when you're not holding locks.
+- * It will only fail if we're truly out of space (or over quota).
++ * If TXG_WAIT is set and the currently open txg is full, this function
++ * will wait until there's a new txg. This should be used when no locks
++ * are being held. With this bit set, this function will only fail if
++ * we're truly out of space (or over quota).
+ *
+- * (2) TXG_NOWAIT. If we can't assign into the current open txg without
+- * blocking, returns immediately with ERESTART. This should be used
+- * whenever you're holding locks. On an ERESTART error, the caller
+- * should drop locks, do a dmu_tx_wait(tx), and try again.
++ * If TXG_WAIT is *not* set and we can't assign into the currently open
++ * txg without blocking, this function will return immediately with
++ * ERESTART. This should be used whenever locks are being held. On an
++ * ERESTART error, the caller should drop all locks, call dmu_tx_wait(),
++ * and try again.
+ *
+- * (3) TXG_WAITED. Like TXG_NOWAIT, but indicates that dmu_tx_wait()
+- * has already been called on behalf of this operation (though
+- * most likely on a different tx).
++ * If TXG_NOTHROTTLE is set, this indicates that this tx should not be
++ * delayed due on the ZFS Write Throttle (see comments in dsl_pool.c for
++ * details on the throttle). This is used by the VFS operations, after
++ * they have already called dmu_tx_wait() (though most likely on a
++ * different tx).
+ */
+ int
+-dmu_tx_assign(dmu_tx_t *tx, txg_how_t txg_how)
++dmu_tx_assign(dmu_tx_t *tx, uint64_t txg_how)
+ {
+ int err;
+
+ ASSERT(tx->tx_txg == 0);
+- ASSERT(txg_how == TXG_WAIT || txg_how == TXG_NOWAIT ||
+- txg_how == TXG_WAITED);
++ ASSERT0(txg_how & ~(TXG_WAIT | TXG_NOTHROTTLE));
+ ASSERT(!dsl_pool_sync_context(tx->tx_pool));
+
+- if (txg_how == TXG_WAITED)
+- tx->tx_waited = B_TRUE;
+-
+ /* If we might wait, we must not hold the config lock. */
+- ASSERT(txg_how != TXG_WAIT || !dsl_pool_config_held(tx->tx_pool));
++ IMPLY((txg_how & TXG_WAIT), !dsl_pool_config_held(tx->tx_pool));
++
++ if ((txg_how & TXG_NOTHROTTLE))
++ tx->tx_dirty_delayed = B_TRUE;
+
+ while ((err = dmu_tx_try_assign(tx, txg_how)) != 0) {
+ dmu_tx_unassign(tx);
+
+- if (err != ERESTART || txg_how != TXG_WAIT)
++ if (err != ERESTART || !(txg_how & TXG_WAIT))
+ return (err);
+
+ dmu_tx_wait(tx);
+@@ -1054,12 +1057,12 @@ dmu_tx_wait(dmu_tx_t *tx)
+ tx->tx_wait_dirty = B_FALSE;
+
+ /*
+- * Note: setting tx_waited only has effect if the caller
+- * used TX_WAIT. Otherwise they are going to destroy
+- * this tx and try again. The common case, zfs_write(),
+- * uses TX_WAIT.
++ * Note: setting tx_dirty_delayed only has effect if the
++ * caller used TX_WAIT. Otherwise they are going to
++ * destroy this tx and try again. The common case,
++ * zfs_write(), uses TX_WAIT.
+ */
+- tx->tx_waited = B_TRUE;
++ tx->tx_dirty_delayed = B_TRUE;
+ } else if (spa_suspended(spa) || tx->tx_lasttried_txg == 0) {
+ /*
+ * If the pool is suspended we need to wait until it
+diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
+index 34ea751c..4805f897 100644
+--- a/module/zfs/zfs_vnops.c
++++ b/module/zfs/zfs_vnops.c
+@@ -129,7 +129,7 @@
+ *
+ * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
+ * then drop all locks, call dmu_tx_wait(), and try again. On subsequent
+- * calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
++ * calls to dmu_tx_assign(), pass TXG_NOTHROTTLE in addition to TXG_NOWAIT,
+ * to indicate that this operation has already called dmu_tx_wait().
+ * This will ensure that we don't retry forever, waiting a short bit
+ * each time.
+@@ -154,7 +154,7 @@
+ * rw_enter(...); // grab any other locks you need
+ * tx = dmu_tx_create(...); // get DMU tx
+ * dmu_tx_hold_*(); // hold each object you might modify
+- * error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ * error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ * if (error) {
+ * rw_exit(...); // drop locks
+ * zfs_dirent_unlock(dl); // unlock directory entry
+@@ -1427,7 +1427,8 @@ top:
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx,
++ (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ zfs_dirent_unlock(dl);
+ if (error == ERESTART) {
+@@ -1602,7 +1603,7 @@ top:
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ if (error == ERESTART) {
+ waited = B_TRUE;
+@@ -1775,7 +1776,7 @@ top:
+ */
+ dmu_tx_mark_netfree(tx);
+
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ zfs_dirent_unlock(dl);
+ if (error == ERESTART) {
+@@ -2017,7 +2018,7 @@ top:
+ dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+ ZFS_SA_BASE_ATTR_SIZE);
+
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ zfs_dirent_unlock(dl);
+ if (error == ERESTART) {
+@@ -2156,7 +2157,7 @@ top:
+ zfs_sa_upgrade_txholds(tx, zp);
+ zfs_sa_upgrade_txholds(tx, dzp);
+ dmu_tx_mark_netfree(tx);
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ rw_exit(&zp->z_parent_lock);
+ rw_exit(&zp->z_name_lock);
+@@ -3623,7 +3624,7 @@ top:
+
+ zfs_sa_upgrade_txholds(tx, szp);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ if (zl != NULL)
+ zfs_rename_unlock(&zl);
+@@ -3815,7 +3816,7 @@ top:
+ }
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ zfs_dirent_unlock(dl);
+ if (error == ERESTART) {
+@@ -4041,7 +4042,7 @@ top:
+
+ zfs_sa_upgrade_txholds(tx, szp);
+ zfs_sa_upgrade_txholds(tx, dzp);
+- error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
++ error = dmu_tx_assign(tx, (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+ zfs_dirent_unlock(dl);
+ if (error == ERESTART) {
+diff --git a/module/zfs/zil.c b/module/zfs/zil.c
+index 645b1d4d..a2bbdcb9 100644
+--- a/module/zfs/zil.c
++++ b/module/zfs/zil.c
+@@ -1009,7 +1009,15 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
+ * to clean up in the event of allocation failure or I/O failure.
+ */
+ tx = dmu_tx_create(zilog->zl_os);
+- VERIFY(dmu_tx_assign(tx, TXG_WAIT) == 0);
++
++ /*
++ * Since we are not going to create any new dirty data, and we
++ * can even help with clearing the existing dirty data, we
++ * should not be subject to the dirty data based delays. We
++ * use TXG_NOTHROTTLE to bypass the delay mechanism.
++ */
++ VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
++
+ dsl_dataset_dirty(dmu_objset_ds(zilog->zl_os), tx);
+ txg = dmu_tx_get_txg(tx);
+
diff --git a/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch b/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch
new file mode 100644
index 0000000..d39118b
--- /dev/null
+++ b/zfs-patches/0009-Fix-divide-by-zero-in-mmp_delay_update.patch
@@ -0,0 +1,34 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Olaf Faaland <faaland1 at llnl.gov>
+Date: Fri, 6 Apr 2018 13:29:11 -0700
+Subject: [PATCH] Fix divide-by-zero in mmp_delay_update()
+
+vdev_count_leaves() in the denominator may return 0, caught by Coverity.
+Introduced by
+
+* 533ea04 Update mmp_delay on sync or skipped, failed write
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed-by: Giuseppe Di Natale <dinatale2 at llnl.gov>
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Signed-off-by: Olaf Faaland <faaland1 at llnl.gov>
+Closes #7391
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/mmp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
+index 1ae5f31f..3b74a6b6 100644
+--- a/module/zfs/mmp.c
++++ b/module/zfs/mmp.c
+@@ -327,7 +327,7 @@ mmp_delay_update(spa_t *spa, boolean_t write_completed)
+ */
+ if (delay < mts->mmp_delay) {
+ hrtime_t min_delay = MSEC2NSEC(zfs_multihost_interval) /
+- vdev_count_leaves(spa);
++ MAX(1, vdev_count_leaves(spa));
+ mts->mmp_delay = MAX(((delay + mts->mmp_delay * 127) / 128),
+ min_delay);
+ }
diff --git a/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch b/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch
new file mode 100644
index 0000000..910f4b8
--- /dev/null
+++ b/zfs-patches/0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch
@@ -0,0 +1,867 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Chunwei Chen <tuxoko at gmail.com>
+Date: Wed, 18 Apr 2018 14:19:50 -0700
+Subject: [PATCH] Fix ENOSPC in "Handle zap_add() failures in ..."
+
+Commit cc63068 caused ENOSPC error when copy a large amount of files
+between two directories. The reason is that the patch limits zap leaf
+expansion to 2 retries, and return ENOSPC when failed.
+
+The intent for limiting retries is to prevent pointlessly growing table
+to max size when adding a block full of entries with same name in
+different case in mixed mode. However, it turns out we cannot use any
+limit on the retry. When we copy files from one directory in readdir
+order, we are copying in hash order, one leaf block at a time. Which
+means that if the leaf block in source directory has expanded 6 times,
+and you copy those entries in that block, by the time you need to expand
+the leaf in destination directory, you need to expand it 6 times in one
+go. So any limit on the retry will result in error where it shouldn't.
+
+Note that while we do use different salt for different directories, it
+seems that the salt/hash function doesn't provide enough randomization
+to the hash distance to prevent this from happening.
+
+Since cc63068 has already been reverted. This patch adds it back and
+removes the retry limit.
+
+Also, as it turn out, failing on zap_add() has a serious side effect for
+mzap_upgrade(). When upgrading from micro zap to fat zap, it will
+call zap_add() to transfer entries one at a time. If it hit any error
+halfway through, the remaining entries will be lost, causing those files
+to become orphan. This patch add a VERIFY to catch it.
+
+Reviewed-by: Sanjeev Bagewadi <sanjeev.bagewadi at gmail.com>
+Reviewed-by: Richard Yao <ryao at gentoo.org>
+Reviewed-by: Tony Hutter <hutter2 at llnl.gov>
+Reviewed-by: Albert Lee <trisk at forkgnu.org>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed by: Matthew Ahrens <mahrens at delphix.com>
+Signed-off-by: Chunwei Chen <david.chen at nutanix.com>
+Closes #7401
+Closes #7421
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ configure.ac | 1 +
+ include/sys/zap_leaf.h | 15 ++-
+ module/zfs/zap.c | 10 +-
+ module/zfs/zap_leaf.c | 2 +-
+ module/zfs/zap_micro.c | 47 ++++++-
+ module/zfs/zfs_dir.c | 29 ++++-
+ module/zfs/zfs_vnops.c | 74 ++++++++---
+ tests/runfiles/linux.run | 6 +-
+ tests/zfs-tests/tests/functional/Makefile.am | 1 +
+ .../tests/functional/casenorm/Makefile.am | 1 +
+ .../functional/casenorm/mixed_create_failure.ksh | 136 +++++++++++++++++++++
+ .../zfs-tests/tests/functional/cp_files/.gitignore | 1 +
+ .../tests/functional/cp_files/Makefile.am | 13 ++
+ .../tests/functional/cp_files/cleanup.ksh | 34 ++++++
+ .../zfs-tests/tests/functional/cp_files/cp_files.c | 58 +++++++++
+ .../tests/functional/cp_files/cp_files_001_pos.ksh | 74 +++++++++++
+ .../zfs-tests/tests/functional/cp_files/setup.ksh | 35 ++++++
+ 17 files changed, 500 insertions(+), 37 deletions(-)
+ create mode 100755 tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
+ create mode 100644 tests/zfs-tests/tests/functional/cp_files/.gitignore
+ create mode 100644 tests/zfs-tests/tests/functional/cp_files/Makefile.am
+ create mode 100755 tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
+ create mode 100644 tests/zfs-tests/tests/functional/cp_files/cp_files.c
+ create mode 100755 tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
+ create mode 100755 tests/zfs-tests/tests/functional/cp_files/setup.ksh
+
+diff --git a/configure.ac b/configure.ac
+index d9441a0f..3f4925c3 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -238,6 +238,7 @@ AC_CONFIG_FILES([
+ tests/zfs-tests/tests/functional/cli_user/zpool_iostat/Makefile
+ tests/zfs-tests/tests/functional/cli_user/zpool_list/Makefile
+ tests/zfs-tests/tests/functional/compression/Makefile
++ tests/zfs-tests/tests/functional/cp_files/Makefile
+ tests/zfs-tests/tests/functional/ctime/Makefile
+ tests/zfs-tests/tests/functional/delegate/Makefile
+ tests/zfs-tests/tests/functional/devices/Makefile
+diff --git a/include/sys/zap_leaf.h b/include/sys/zap_leaf.h
+index e784c596..a3da1036 100644
+--- a/include/sys/zap_leaf.h
++++ b/include/sys/zap_leaf.h
+@@ -46,10 +46,15 @@ struct zap_stats;
+ * block size (1<<l->l_bs) - hash entry size (2) * number of hash
+ * entries - header space (2*chunksize)
+ */
+-#define ZAP_LEAF_NUMCHUNKS(l) \
+- (((1<<(l)->l_bs) - 2*ZAP_LEAF_HASH_NUMENTRIES(l)) / \
++#define ZAP_LEAF_NUMCHUNKS_BS(bs) \
++ (((1<<(bs)) - 2*ZAP_LEAF_HASH_NUMENTRIES_BS(bs)) / \
+ ZAP_LEAF_CHUNKSIZE - 2)
+
++#define ZAP_LEAF_NUMCHUNKS(l) (ZAP_LEAF_NUMCHUNKS_BS(((l)->l_bs)))
++
++#define ZAP_LEAF_NUMCHUNKS_DEF \
++ (ZAP_LEAF_NUMCHUNKS_BS(fzap_default_block_shift))
++
+ /*
+ * The amount of space within the chunk available for the array is:
+ * chunk size - space for type (1) - space for next pointer (2)
+@@ -74,8 +79,10 @@ struct zap_stats;
+ * which is less than block size / CHUNKSIZE (24) / minimum number of
+ * chunks per entry (3).
+ */
+-#define ZAP_LEAF_HASH_SHIFT(l) ((l)->l_bs - 5)
+-#define ZAP_LEAF_HASH_NUMENTRIES(l) (1 << ZAP_LEAF_HASH_SHIFT(l))
++#define ZAP_LEAF_HASH_SHIFT_BS(bs) ((bs) - 5)
++#define ZAP_LEAF_HASH_NUMENTRIES_BS(bs) (1 << ZAP_LEAF_HASH_SHIFT_BS(bs))
++#define ZAP_LEAF_HASH_SHIFT(l) (ZAP_LEAF_HASH_SHIFT_BS(((l)->l_bs)))
++#define ZAP_LEAF_HASH_NUMENTRIES(l) (ZAP_LEAF_HASH_NUMENTRIES_BS(((l)->l_bs)))
+
+ /*
+ * The chunks start immediately after the hash table. The end of the
+diff --git a/module/zfs/zap.c b/module/zfs/zap.c
+index ee9962bf..47b4c1ab 100644
+--- a/module/zfs/zap.c
++++ b/module/zfs/zap.c
+@@ -853,8 +853,16 @@ retry:
+ } else if (err == EAGAIN) {
+ err = zap_expand_leaf(zn, l, tag, tx, &l);
+ zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
+- if (err == 0)
++ if (err == 0) {
+ goto retry;
++ } else if (err == ENOSPC) {
++ /*
++ * If we failed to expand the leaf, then bailout
++ * as there is no point trying
++ * zap_put_leaf_maybe_grow_ptrtbl().
++ */
++ return (err);
++ }
+ }
+
+ out:
+diff --git a/module/zfs/zap_leaf.c b/module/zfs/zap_leaf.c
+index c342695c..526e4660 100644
+--- a/module/zfs/zap_leaf.c
++++ b/module/zfs/zap_leaf.c
+@@ -53,7 +53,7 @@ static uint16_t *zap_leaf_rehash_entry(zap_leaf_t *l, uint16_t entry);
+ ((h) >> \
+ (64 - ZAP_LEAF_HASH_SHIFT(l) - zap_leaf_phys(l)->l_hdr.lh_prefix_len)))
+
+-#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
++#define LEAF_HASH_ENTPTR(l, h) (&zap_leaf_phys(l)->l_hash[LEAF_HASH(l, h)])
+
+ extern inline zap_leaf_phys_t *zap_leaf_phys(zap_leaf_t *l);
+
+diff --git a/module/zfs/zap_micro.c b/module/zfs/zap_micro.c
+index 3ebf995c..60e193ef 100644
+--- a/module/zfs/zap_micro.c
++++ b/module/zfs/zap_micro.c
+@@ -363,6 +363,41 @@ mze_find_unused_cd(zap_t *zap, uint64_t hash)
+ return (cd);
+ }
+
++/*
++ * Each mzap entry requires at max : 4 chunks
++ * 3 chunks for names + 1 chunk for value.
++ */
++#define MZAP_ENT_CHUNKS (1 + ZAP_LEAF_ARRAY_NCHUNKS(MZAP_NAME_LEN) + \
++ ZAP_LEAF_ARRAY_NCHUNKS(sizeof (uint64_t)))
++
++/*
++ * Check if the current entry keeps the colliding entries under the fatzap leaf
++ * size.
++ */
++static boolean_t
++mze_canfit_fzap_leaf(zap_name_t *zn, uint64_t hash)
++{
++ zap_t *zap = zn->zn_zap;
++ mzap_ent_t mze_tofind;
++ mzap_ent_t *mze;
++ avl_index_t idx;
++ avl_tree_t *avl = &zap->zap_m.zap_avl;
++ uint32_t mzap_ents = 0;
++
++ mze_tofind.mze_hash = hash;
++ mze_tofind.mze_cd = 0;
++
++ for (mze = avl_find(avl, &mze_tofind, &idx);
++ mze && mze->mze_hash == hash; mze = AVL_NEXT(avl, mze)) {
++ mzap_ents++;
++ }
++
++ /* Include the new entry being added */
++ mzap_ents++;
++
++ return (ZAP_LEAF_NUMCHUNKS_DEF > (mzap_ents * MZAP_ENT_CHUNKS));
++}
++
+ static void
+ mze_remove(zap_t *zap, mzap_ent_t *mze)
+ {
+@@ -639,16 +674,15 @@ mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
+ dprintf("adding %s=%llu\n",
+ mze->mze_name, mze->mze_value);
+ zn = zap_name_alloc(zap, mze->mze_name, 0);
+- err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
+- tag, tx);
++ /* If we fail here, we would end up losing entries */
++ VERIFY0(fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
++ tag, tx));
+ zap = zn->zn_zap; /* fzap_add_cd() may change zap */
+ zap_name_free(zn);
+- if (err)
+- break;
+ }
+ vmem_free(mzp, sz);
+ *zapp = zap;
+- return (err);
++ return (0);
+ }
+
+ /*
+@@ -1191,7 +1225,8 @@ zap_add_impl(zap_t *zap, const char *key,
+ err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
+ zap = zn->zn_zap; /* fzap_add() may change zap */
+ } else if (integer_size != 8 || num_integers != 1 ||
+- strlen(key) >= MZAP_NAME_LEN) {
++ strlen(key) >= MZAP_NAME_LEN ||
++ !mze_canfit_fzap_leaf(zn, zn->zn_hash)) {
+ err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
+ if (err == 0) {
+ err = fzap_add(zn, integer_size, num_integers, val,
+diff --git a/module/zfs/zfs_dir.c b/module/zfs/zfs_dir.c
+index 9a8bbccd..6398a1d1 100644
+--- a/module/zfs/zfs_dir.c
++++ b/module/zfs/zfs_dir.c
+@@ -742,7 +742,11 @@ zfs_dirent(znode_t *zp, uint64_t mode)
+ }
+
+ /*
+- * Link zp into dl. Can only fail if zp has been unlinked.
++ * Link zp into dl. Can fail in the following cases :
++ * - if zp has been unlinked.
++ * - if the number of entries with the same hash (aka. colliding entries)
++ * exceed the capacity of a leaf-block of fatzap and splitting of the
++ * leaf-block does not help.
+ */
+ int
+ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+@@ -776,6 +780,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+ NULL, &links, sizeof (links));
+ }
+ }
++
++ value = zfs_dirent(zp, zp->z_mode);
++ error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name, 8, 1,
++ &value, tx);
++
++ /*
++ * zap_add could fail to add the entry if it exceeds the capacity of the
++ * leaf-block and zap_leaf_split() failed to help.
++ * The caller of this routine is responsible for failing the transaction
++ * which will rollback the SA updates done above.
++ */
++ if (error != 0) {
++ if (!(flag & ZRENAMING) && !(flag & ZNEW))
++ drop_nlink(ZTOI(zp));
++ mutex_exit(&zp->z_lock);
++ return (error);
++ }
++
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
+ &dzp->z_id, sizeof (dzp->z_id));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
+@@ -813,11 +835,6 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
+ ASSERT(error == 0);
+ mutex_exit(&dzp->z_lock);
+
+- value = zfs_dirent(zp, zp->z_mode);
+- error = zap_add(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
+- 8, 1, &value, tx);
+- ASSERT(error == 0);
+-
+ return (0);
+ }
+
+diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
+index 4805f897..5a2e55eb 100644
+--- a/module/zfs/zfs_vnops.c
++++ b/module/zfs/zfs_vnops.c
+@@ -1427,6 +1427,7 @@ top:
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
++
+ error = dmu_tx_assign(tx,
+ (waited ? TXG_NOTHROTTLE : 0) | TXG_NOWAIT);
+ if (error) {
+@@ -1444,10 +1445,22 @@ top:
+ }
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
++ error = zfs_link_create(dl, zp, tx, ZNEW);
++ if (error != 0) {
++ /*
++ * Since, we failed to add the directory entry for it,
++ * delete the newly created dnode.
++ */
++ zfs_znode_delete(zp, tx);
++ remove_inode_hash(ZTOI(zp));
++ zfs_acl_ids_free(&acl_ids);
++ dmu_tx_commit(tx);
++ goto out;
++ }
++
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
+- (void) zfs_link_create(dl, zp, tx, ZNEW);
+ txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
+ if (flag & FIGNORECASE)
+ txtype |= TX_CI;
+@@ -2038,13 +2051,18 @@ top:
+ */
+ zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
+
+- if (fuid_dirtied)
+- zfs_fuid_sync(zfsvfs, tx);
+-
+ /*
+ * Now put new name in parent dir.
+ */
+- (void) zfs_link_create(dl, zp, tx, ZNEW);
++ error = zfs_link_create(dl, zp, tx, ZNEW);
++ if (error != 0) {
++ zfs_znode_delete(zp, tx);
++ remove_inode_hash(ZTOI(zp));
++ goto out;
++ }
++
++ if (fuid_dirtied)
++ zfs_fuid_sync(zfsvfs, tx);
+
+ *ipp = ZTOI(zp);
+
+@@ -2054,6 +2072,7 @@ top:
+ zfs_log_create(zilog, tx, txtype, dzp, zp, dirname, vsecp,
+ acl_ids.z_fuidp, vap);
+
++out:
+ zfs_acl_ids_free(&acl_ids);
+
+ dmu_tx_commit(tx);
+@@ -2063,10 +2082,14 @@ top:
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ zil_commit(zilog, 0);
+
+- zfs_inode_update(dzp);
+- zfs_inode_update(zp);
++ if (error != 0) {
++ iput(ZTOI(zp));
++ } else {
++ zfs_inode_update(dzp);
++ zfs_inode_update(zp);
++ }
+ ZFS_EXIT(zfsvfs);
+- return (0);
++ return (error);
+ }
+
+ /*
+@@ -3684,6 +3707,13 @@ top:
+ VERIFY3U(zfs_link_destroy(tdl, szp, tx,
+ ZRENAMING, NULL), ==, 0);
+ }
++ } else {
++ /*
++ * If we had removed the existing target, subsequent
++ * call to zfs_link_create() to add back the same entry
++ * but, the new dnode (szp) should not fail.
++ */
++ ASSERT(tzp == NULL);
+ }
+ }
+
+@@ -3854,14 +3884,18 @@ top:
+ /*
+ * Insert the new object into the directory.
+ */
+- (void) zfs_link_create(dl, zp, tx, ZNEW);
+-
+- if (flags & FIGNORECASE)
+- txtype |= TX_CI;
+- zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
++ error = zfs_link_create(dl, zp, tx, ZNEW);
++ if (error != 0) {
++ zfs_znode_delete(zp, tx);
++ remove_inode_hash(ZTOI(zp));
++ } else {
++ if (flags & FIGNORECASE)
++ txtype |= TX_CI;
++ zfs_log_symlink(zilog, tx, txtype, dzp, zp, name, link);
+
+- zfs_inode_update(dzp);
+- zfs_inode_update(zp);
++ zfs_inode_update(dzp);
++ zfs_inode_update(zp);
++ }
+
+ zfs_acl_ids_free(&acl_ids);
+
+@@ -3869,10 +3903,14 @@ top:
+
+ zfs_dirent_unlock(dl);
+
+- *ipp = ZTOI(zp);
++ if (error == 0) {
++ *ipp = ZTOI(zp);
+
+- if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
+- zil_commit(zilog, 0);
++ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
++ zil_commit(zilog, 0);
++ } else {
++ iput(ZTOI(zp));
++ }
+
+ ZFS_EXIT(zfsvfs);
+ return (error);
+diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
+index 272c8c77..379c9f73 100644
+--- a/tests/runfiles/linux.run
++++ b/tests/runfiles/linux.run
+@@ -55,7 +55,7 @@ tags = ['functional', 'cachefile']
+ # 'mixed_none_lookup', 'mixed_none_lookup_ci', 'mixed_none_delete',
+ # 'mixed_formd_lookup', 'mixed_formd_lookup_ci', 'mixed_formd_delete']
+ [tests/functional/casenorm]
+-tests = ['case_all_values', 'norm_all_values']
++tests = ['case_all_values', 'norm_all_values', 'mixed_create_failure']
+ tags = ['functional', 'casenorm']
+
+ [tests/functional/chattr]
+@@ -394,6 +394,10 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos',
+ 'compress_004_pos']
+ tags = ['functional', 'compression']
+
++[tests/functional/cp_files]
++tests = ['cp_files_001_pos']
++tags = ['functional', 'cp_files']
++
+ [tests/functional/ctime]
+ tests = ['ctime_001_pos' ]
+ tags = ['functional', 'ctime']
+diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am
+index cd60324f..ea52205a 100644
+--- a/tests/zfs-tests/tests/functional/Makefile.am
++++ b/tests/zfs-tests/tests/functional/Makefile.am
+@@ -11,6 +11,7 @@ SUBDIRS = \
+ cli_root \
+ cli_user \
+ compression \
++ cp_files \
+ ctime \
+ delegate \
+ devices \
+diff --git a/tests/zfs-tests/tests/functional/casenorm/Makefile.am b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
+index 65dd156e..b284a256 100644
+--- a/tests/zfs-tests/tests/functional/casenorm/Makefile.am
++++ b/tests/zfs-tests/tests/functional/casenorm/Makefile.am
+@@ -7,6 +7,7 @@ dist_pkgdata_SCRIPTS = \
+ insensitive_formd_lookup.ksh \
+ insensitive_none_delete.ksh \
+ insensitive_none_lookup.ksh \
++ mixed_create_failure.ksh \
+ mixed_formd_delete.ksh \
+ mixed_formd_lookup_ci.ksh \
+ mixed_formd_lookup.ksh \
+diff --git a/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
+new file mode 100755
+index 00000000..51b5bb3f
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/casenorm/mixed_create_failure.ksh
+@@ -0,0 +1,136 @@
++#!/bin/ksh -p
++#
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++#
++# Copyright 2018 Nutanix Inc. All rights reserved.
++#
++
++. $STF_SUITE/tests/functional/casenorm/casenorm.kshlib
++
++# DESCRIPTION:
++# For the filesystem with casesensitivity=mixed, normalization=none,
++# when multiple files with the same name (differing only in case) are created,
++# the number of files is limited to what can fit in a fatzap leaf-block.
++# And beyond that, it fails with ENOSPC.
++#
++# Ensure that the create/rename operations fail gracefully and not trigger an
++# ASSERT.
++#
++# STRATEGY:
++# Repeat the below steps for objects: files, directories, symlinks and hardlinks
++# 1. Create objects with same name but varying in case.
++# E.g. 'abcdefghijklmnop', 'Abcdefghijklmnop', 'ABcdefghijklmnop' etc.
++# The create should fail with ENOSPC.
++# 2. Create an object with name 'tmp_obj' and try to rename it to name that we
++# failed to add in step 1 above.
++# This should fail as well.
++
++verify_runnable "global"
++
++function cleanup
++{
++ destroy_testfs
++}
++
++log_onexit cleanup
++log_assert "With mixed mode: ensure create fails with ENOSPC beyond a certain limit"
++
++create_testfs "-o casesensitivity=mixed -o normalization=none"
++
++# Different object types
++obj_type=('file' 'dir' 'symlink' 'hardlink')
++
++# Commands to create different object types
++typeset -A ops
++ops['file']='touch'
++ops['dir']='mkdir'
++ops['symlink']='ln -s'
++ops['hardlink']='ln'
++
++# This function tests the following for a give object type :
++# - Create multiple objects with the same name (varying only in case).
++# Ensure that it eventually fails once the leaf-block limit is exceeded.
++# - Create another object with a different name. And attempt rename it to the
++# name (for which the create had failed in the previous step).
++# This should fail as well.
++# Args :
++# $1 - object type (file/dir/symlink/hardlink)
++# $2 - test directory
++#
++function test_ops
++{
++ typeset obj_type=$1
++ typeset testdir=$2
++
++ target_obj='target-file'
++
++ op="${ops[$obj_type]}"
++
++ log_note "The op : $op"
++ log_note "testdir=$testdir obj_type=$obj_type"
++
++ test_path="$testdir/$obj_type"
++ mkdir $test_path
++ log_note "Created test dir $test_path"
++
++ if [[ $obj_type = "symlink" || $obj_type = "hardlink" ]]; then
++ touch $test_path/$target_obj
++ log_note "Created target: $test_path/$target_obj"
++ op="$op $test_path/$target_obj"
++ fi
++
++ log_note "op : $op"
++ names='{a,A}{b,B}{c,C}{d,D}{e,E}{f,F}{g,G}{h,H}{i,I}{j,J}{k,K}{l,L}'
++ for name in $names; do
++ cmd="$op $test_path/$name"
++ out=$($cmd 2>&1)
++ ret=$?
++ log_note "cmd: $cmd ret: $ret out=$out"
++ if (($ret != 0)); then
++ if [[ $out = *@(No space left on device)* ]]; then
++ save_name="$test_path/$name"
++ break;
++ else
++ log_err "$cmd failed with unexpected error : $out"
++ fi
++ fi
++ done
++
++ log_note 'Test rename \"sample_name\" rename'
++ TMP_OBJ="$test_path/tmp_obj"
++ cmd="$op $TMP_OBJ"
++ out=$($cmd 2>&1)
++ ret=$?
++ if (($ret != 0)); then
++ log_err "cmd:$cmd failed out:$out"
++ fi
++
++ # Now, try to rename the tmp_obj to the name which we failed to add earlier.
++ # This should fail as well.
++ out=$(mv $TMP_OBJ $save_name 2>&1)
++ ret=$?
++ if (($ret != 0)); then
++ if [[ $out = *@(No space left on device)* ]]; then
++ log_note "$cmd failed as expected : $out"
++ else
++ log_err "$cmd failed with : $out"
++ fi
++ fi
++}
++
++for obj_type in ${obj_type[*]};
++do
++ log_note "Testing create of $obj_type"
++ test_ops $obj_type $TESTDIR
++done
++
++log_pass "Mixed mode FS: Ops on large number of colliding names fail gracefully"
+diff --git a/tests/zfs-tests/tests/functional/cp_files/.gitignore b/tests/zfs-tests/tests/functional/cp_files/.gitignore
+new file mode 100644
+index 00000000..eac05e15
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/.gitignore
+@@ -0,0 +1 @@
++/cp_files
+diff --git a/tests/zfs-tests/tests/functional/cp_files/Makefile.am b/tests/zfs-tests/tests/functional/cp_files/Makefile.am
+new file mode 100644
+index 00000000..06c31f5f
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/Makefile.am
+@@ -0,0 +1,13 @@
++include $(top_srcdir)/config/Rules.am
++
++pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files
++
++dist_pkgdata_SCRIPTS = \
++ cp_files_001_pos.ksh \
++ cleanup.ksh \
++ setup.ksh
++
++pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/cp_files
++
++pkgexec_PROGRAMS = cp_files
++cp_files_SOURCES= cp_files.c
+diff --git a/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
+new file mode 100755
+index 00000000..3166bd6e
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/cleanup.ksh
+@@ -0,0 +1,34 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
++# Use is subject to license terms.
++#
++
++#
++# Copyright (c) 2013 by Delphix. All rights reserved.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++default_cleanup
+diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files.c b/tests/zfs-tests/tests/functional/cp_files/cp_files.c
+new file mode 100644
+index 00000000..9af64a11
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/cp_files.c
+@@ -0,0 +1,58 @@
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <fcntl.h>
++#include <dirent.h>
++#include <errno.h>
++#include <string.h>
++
++int
++main(int argc, char *argv[])
++{
++ int tfd;
++ DIR *sdir;
++ struct dirent *dirent;
++
++ if (argc != 3) {
++ fprintf(stderr, "Usage: %s SRC DST\n", argv[0]);
++ exit(1);
++ }
++
++ sdir = opendir(argv[1]);
++ if (sdir == NULL) {
++ fprintf(stderr, "Failed to open %s: %s\n",
++ argv[1], strerror(errno));
++ exit(2);
++ }
++
++ tfd = open(argv[2], O_DIRECTORY);
++ if (tfd < 0) {
++ fprintf(stderr, "Failed to open %s: %s\n",
++ argv[2], strerror(errno));
++ closedir(sdir);
++ exit(3);
++ }
++
++ while ((dirent = readdir(sdir)) != NULL) {
++ if (dirent->d_name[0] == '.' &&
++ (dirent->d_name[1] == '.' || dirent->d_name[1] == '\0'))
++ continue;
++
++ int fd = openat(tfd, dirent->d_name, O_CREAT|O_WRONLY, 0666);
++ if (fd < 0) {
++ fprintf(stderr, "Failed to create %s/%s: %s\n",
++ argv[2], dirent->d_name, strerror(errno));
++ closedir(sdir);
++ close(tfd);
++ exit(4);
++ }
++ close(fd);
++ }
++
++ closedir(sdir);
++ close(tfd);
++
++ return (0);
++}
+diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
+new file mode 100755
+index 00000000..3e138cfc
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/cp_files_001_pos.ksh
+@@ -0,0 +1,74 @@
++#! /bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright (c) 2018 by Nutanix. All rights reserved.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++#
++# DESCRIPTION:
++# Copy a large number of files between 2 directories
++# within a zfs filesystem works without errors.
++# This make sure zap upgrading and expanding works.
++#
++# STRATEGY:
++#
++# 1. Create NR_FILES files in directory src
++# 2. Check the number of files is correct
++# 3. Copy files from src to dst in readdir order
++# 4. Check the number of files is correct
++#
++
++verify_runnable "global"
++
++function cleanup
++{
++ rm -rf $TESTDIR/src $TESTDIR/dst
++}
++
++log_assert "Copy a large number of files between 2 directories" \
++ "within a zfs filesystem works without errors"
++
++log_onexit cleanup
++
++NR_FILES=60000
++BATCH=1000
++
++log_must mkdir $TESTDIR/src
++log_must mkdir $TESTDIR/dst
++
++WD=$(pwd)
++cd $TESTDIR/src
++# create NR_FILES in BATCH at a time to prevent overflowing argument buffer
++for i in $(seq $(($NR_FILES/$BATCH))); do touch $(seq $((($i-1)*$BATCH+1)) $(($i*$BATCH))); done
++cd $WD
++
++log_must test $NR_FILES -eq $(ls -U $TESTDIR/src | wc -l)
++
++# copy files from src to dst, use cp_files to make sure we copy in readdir order
++log_must $STF_SUITE/tests/functional/cp_files/cp_files $TESTDIR/src $TESTDIR/dst
++
++log_must test $NR_FILES -eq $(ls -U $TESTDIR/dst | wc -l)
++
++log_pass
+diff --git a/tests/zfs-tests/tests/functional/cp_files/setup.ksh b/tests/zfs-tests/tests/functional/cp_files/setup.ksh
+new file mode 100755
+index 00000000..fc5cec30
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/cp_files/setup.ksh
+@@ -0,0 +1,35 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++
++#
++# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
++# Use is subject to license terms.
++#
++
++#
++# Copyright (c) 2013 by Delphix. All rights reserved.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++DISK=${DISKS%% *}
++default_setup $DISK
diff --git a/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch b/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch
new file mode 100644
index 0000000..09b797e
--- /dev/null
+++ b/zfs-patches/0011-Trim-new-line-from-zfs_vdev_scheduler.patch
@@ -0,0 +1,155 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Tue, 5 Sep 2017 13:41:32 -0700
+Subject: [PATCH] Trim new line from zfs_vdev_scheduler
+
+Add a helper function to trim the tailing new line. While we're
+here use this new hook to immediately apply the new scheduler.
+
+Reviewed-by: Giuseppe Di Natale <dinatale2 at llnl.gov>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #3356
+Closes #6573
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/vdev_disk.c | 71 +++++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 53 insertions(+), 18 deletions(-)
+
+diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
+index 5ae50a31..d6212835 100644
+--- a/module/zfs/vdev_disk.c
++++ b/module/zfs/vdev_disk.c
+@@ -27,13 +27,14 @@
+ */
+
+ #include <sys/zfs_context.h>
+-#include <sys/spa.h>
++#include <sys/spa_impl.h>
+ #include <sys/vdev_disk.h>
+ #include <sys/vdev_impl.h>
+ #include <sys/abd.h>
+ #include <sys/fs/zfs.h>
+ #include <sys/zio.h>
+ #include <sys/sunldi.h>
++#include <linux/mod_compat.h>
+
+ char *zfs_vdev_scheduler = VDEV_SCHEDULER;
+ static void *zfs_vdev_holder = VDEV_HOLDER;
+@@ -113,15 +114,23 @@ vdev_disk_error(zio_t *zio)
+ * physical device. This yields the largest possible requests for
+ * the device with the lowest total overhead.
+ */
+-static int
++static void
+ vdev_elevator_switch(vdev_t *v, char *elevator)
+ {
+ vdev_disk_t *vd = v->vdev_tsd;
+- struct block_device *bdev = vd->vd_bdev;
+- struct request_queue *q = bdev_get_queue(bdev);
+- char *device = bdev->bd_disk->disk_name;
++ struct request_queue *q;
++ char *device;
+ int error;
+
++ for (int c = 0; c < v->vdev_children; c++)
++ vdev_elevator_switch(v->vdev_child[c], elevator);
++
++ if (!v->vdev_ops->vdev_op_leaf || vd->vd_bdev == NULL)
++ return;
++
++ q = bdev_get_queue(vd->vd_bdev);
++ device = vd->vd_bdev->bd_disk->disk_name;
++
+ /*
+ * Skip devices which are not whole disks (partitions).
+ * Device-mapper devices are excepted since they may be whole
+@@ -131,15 +140,15 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
+ * "Skip devices without schedulers" check below will fail.
+ */
+ if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
+- return (0);
++ return;
+
+ /* Skip devices without schedulers (loop, ram, dm, etc) */
+ if (!q->elevator || !blk_queue_stackable(q))
+- return (0);
++ return;
+
+ /* Leave existing scheduler when set to "none" */
+ if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
+- return (0);
++ return;
+
+ #ifdef HAVE_ELEVATOR_CHANGE
+ error = elevator_change(q, elevator);
+@@ -156,20 +165,16 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
+ " 2>/dev/null; " \
+ "echo %s"
+
+- {
+- char *argv[] = { "/bin/sh", "-c", NULL, NULL };
+- char *envp[] = { NULL };
++ char *argv[] = { "/bin/sh", "-c", NULL, NULL };
++ char *envp[] = { NULL };
+
+- argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
+- error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+- strfree(argv[2]);
+- }
++ argv[2] = kmem_asprintf(SET_SCHEDULER_CMD, device, elevator);
++ error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
++ strfree(argv[2]);
+ #endif /* HAVE_ELEVATOR_CHANGE */
+ if (error)
+ printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
+ elevator, v->vdev_path, device, error);
+-
+- return (error);
+ }
+
+ /*
+@@ -798,6 +803,35 @@ vdev_disk_rele(vdev_t *vd)
+ /* XXX: Implement me as a vnode rele for the device */
+ }
+
++static int
++param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
++{
++ spa_t *spa = NULL;
++ char *p;
++
++ if (val == NULL)
++ return (SET_ERROR(-EINVAL));
++
++ if ((p = strchr(val, '\n')) != NULL)
++ *p = '\0';
++
++ mutex_enter(&spa_namespace_lock);
++ while ((spa = spa_next(spa)) != NULL) {
++ if (spa_state(spa) != POOL_STATE_ACTIVE ||
++ !spa_writeable(spa) || spa_suspended(spa))
++ continue;
++
++ spa_open_ref(spa, FTAG);
++ mutex_exit(&spa_namespace_lock);
++ vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
++ mutex_enter(&spa_namespace_lock);
++ spa_close(spa, FTAG);
++ }
++ mutex_exit(&spa_namespace_lock);
++
++ return (param_set_charp(val, kp));
++}
++
+ vdev_ops_t vdev_disk_ops = {
+ vdev_disk_open,
+ vdev_disk_close,
+@@ -812,5 +846,6 @@ vdev_ops_t vdev_disk_ops = {
+ B_TRUE /* leaf vdev */
+ };
+
+-module_param(zfs_vdev_scheduler, charp, 0644);
++module_param_call(zfs_vdev_scheduler, param_set_vdev_scheduler,
++ param_get_charp, &zfs_vdev_scheduler, 0644);
+ MODULE_PARM_DESC(zfs_vdev_scheduler, "I/O scheduler");
diff --git a/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch b/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch
new file mode 100644
index 0000000..bfb22f5
--- /dev/null
+++ b/zfs-patches/0012-module-param-callbacks-check-for-initialized-spa.patch
@@ -0,0 +1,84 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Olaf Faaland <faaland1 at llnl.gov>
+Date: Fri, 11 May 2018 12:46:07 -0700
+Subject: [PATCH] module param callbacks check for initialized spa
+
+Callbacks provided for module parameters are executed both
+after the module is loaded, when a user alters it via sysfs, e.g
+ echo bar > /sys/modules/zfs/parameters/foo
+
+as well as when the module is loaded with an argument, e.g.
+ modprobe zfs foo=bar
+
+In the latter case, the init functions likely have not run yet,
+including spa_init() which initializes the namespace lock so it is safe
+to use.
+
+Instead of immediately taking the namespace lock and attemping to
+iterate over initialized spa structures, check whether spa_mode_global
+is nonzero. This is set by spa_init() after it has initialized the
+namespace lock.
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed-by: Tim Chase <tim at chase2k.com>
+Signed-off-by: Olaf Faaland <faaland1 at llnl.gov>
+Closes #7496
+Closes #7521
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/mmp.c | 3 ++-
+ module/zfs/vdev_disk.c | 24 +++++++++++++-----------
+ 2 files changed, 15 insertions(+), 12 deletions(-)
+
+diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
+index 3b74a6b6..7523310c 100644
+--- a/module/zfs/mmp.c
++++ b/module/zfs/mmp.c
+@@ -607,7 +607,8 @@ param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
+ if (ret < 0)
+ return (ret);
+
+- mmp_signal_all_threads();
++ if (spa_mode_global != 0)
++ mmp_signal_all_threads();
+
+ return (ret);
+ }
+diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
+index d6212835..6761e755 100644
+--- a/module/zfs/vdev_disk.c
++++ b/module/zfs/vdev_disk.c
+@@ -815,19 +815,21 @@ param_set_vdev_scheduler(const char *val, zfs_kernel_param_t *kp)
+ if ((p = strchr(val, '\n')) != NULL)
+ *p = '\0';
+
+- mutex_enter(&spa_namespace_lock);
+- while ((spa = spa_next(spa)) != NULL) {
+- if (spa_state(spa) != POOL_STATE_ACTIVE ||
+- !spa_writeable(spa) || spa_suspended(spa))
+- continue;
+-
+- spa_open_ref(spa, FTAG);
+- mutex_exit(&spa_namespace_lock);
+- vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
++ if (spa_mode_global != 0) {
+ mutex_enter(&spa_namespace_lock);
+- spa_close(spa, FTAG);
++ while ((spa = spa_next(spa)) != NULL) {
++ if (spa_state(spa) != POOL_STATE_ACTIVE ||
++ !spa_writeable(spa) || spa_suspended(spa))
++ continue;
++
++ spa_open_ref(spa, FTAG);
++ mutex_exit(&spa_namespace_lock);
++ vdev_elevator_switch(spa->spa_root_vdev, (char *)val);
++ mutex_enter(&spa_namespace_lock);
++ spa_close(spa, FTAG);
++ }
++ mutex_exit(&spa_namespace_lock);
+ }
+- mutex_exit(&spa_namespace_lock);
+
+ return (param_set_charp(val, kp));
+ }
diff --git a/zfs-patches/0013-Support-Debian-DKMS-builds.patch b/zfs-patches/0013-Support-Debian-DKMS-builds.patch
new file mode 100644
index 0000000..17e7151
--- /dev/null
+++ b/zfs-patches/0013-Support-Debian-DKMS-builds.patch
@@ -0,0 +1,52 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Antonio Russo <antonio.e.russo at gmail.com>
+Date: Sat, 26 May 2018 13:56:24 -0400
+Subject: [PATCH] Support Debian DKMS builds
+
+scripts/dkms.mkconf calls configure with
+`--with-linux=${kernel_source_dir}`, but Debian puts it kernel source at
+`/lib/modules/<version>/source`. This patch adds the same logic to the
+DKMS file produced by `scripts/dkms.mkconf` that Debian has shipped in
+its official ZFS packaging: at DKMS build time, it checks if the system
+is a Debian system, and adjusts the path accordingly.
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Signed-off-by: Antonio Russo <antonio.e.russo at gmail.com>
+Closes #7358
+Closes #7540
+Closes #7554
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ scripts/dkms.mkconf | 17 ++++++++++++++++-
+ 1 file changed, 16 insertions(+), 1 deletion(-)
+
+diff --git a/scripts/dkms.mkconf b/scripts/dkms.mkconf
+index 880510ab..88c28938 100755
+--- a/scripts/dkms.mkconf
++++ b/scripts/dkms.mkconf
+@@ -25,7 +25,22 @@ PACKAGE_CONFIG="${pkgcfg}"
+ PRE_BUILD="configure
+ --prefix=/usr
+ --with-config=kernel
+- --with-linux=\${kernel_source_dir}
++ --with-linux=\$(
++ case \`lsb_release -is\` in
++ (Debian|Devuan)
++ if [[ -e \${kernel_source_dir/%build/source} ]]
++ then
++ echo \${kernel_source_dir/%build/source}
++ else
++ # A kpkg exception for Proxmox 2.0
++ echo \${kernel_source_dir}
++ fi
++ ;;
++ (*)
++ echo \${kernel_source_dir}
++ ;;
++ esac
++ )
+ --with-linux-obj=\${kernel_source_dir}
+ --with-spl=\${source_tree}/spl-\${PACKAGE_VERSION}
+ --with-spl-obj=\${dkms_tree}/spl/\${PACKAGE_VERSION}/\${kernelver}/\${arch}
diff --git a/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch b/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch
new file mode 100644
index 0000000..f11577f
--- /dev/null
+++ b/zfs-patches/0014-zpool-reopen-should-detect-expanded-devices.patch
@@ -0,0 +1,376 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sara Hartse <sara.hartse at gmail.com>
+Date: Thu, 31 May 2018 10:36:37 -0700
+Subject: [PATCH] zpool reopen should detect expanded devices
+
+Update bdev_capacity to have wholedisk vdevs query the
+size of the underlying block device (correcting for the size
+of the efi parition and partition alignment) and therefore detect
+expanded space.
+
+Correct vdev_get_stats_ex so that the expandsize is aligned
+to metaslab size and new space is only reported if it is large
+enough for a new metaslab.
+
+Reviewed by: Don Brady <don.brady at delphix.com>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed by: George Wilson <george.wilson at delphix.com>
+Reviewed-by: Matthew Ahrens <mahrens at delphix.com>
+Reviewed by: John Wren Kennedy <jwk404 at gmail.com>
+Signed-off-by: sara hartse <sara.hartse at delphix.com>
+External-issue: LX-165
+Closes #7546
+Issue #7582
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ include/sys/vdev_disk.h | 12 +++++
+ lib/libefi/rdwr_efi.c | 20 +++++++-
+ lib/libzfs/libzfs_pool.c | 14 +-----
+ module/zfs/vdev.c | 3 +-
+ module/zfs/vdev_disk.c | 46 +++++++++++++-----
+ .../cli_root/zpool_expand/zpool_expand_002_pos.ksh | 54 +++++++++++++++-------
+ 6 files changed, 107 insertions(+), 42 deletions(-)
+
+diff --git a/include/sys/vdev_disk.h b/include/sys/vdev_disk.h
+index 15570b10..b8a32b31 100644
+--- a/include/sys/vdev_disk.h
++++ b/include/sys/vdev_disk.h
+@@ -23,11 +23,23 @@
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1 at llnl.gov>.
+ * LLNL-CODE-403049.
++ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+ #ifndef _SYS_VDEV_DISK_H
+ #define _SYS_VDEV_DISK_H
+
++/*
++ * Don't start the slice at the default block of 34; many storage
++ * devices will use a stripe width of 128k, other vendors prefer a 1m
++ * alignment. It is best to play it safe and ensure a 1m alignment
++ * given 512B blocks. When the block size is larger by a power of 2
++ * we will still be 1m aligned. Some devices are sensitive to the
++ * partition ending alignment as well.
++ */
++#define NEW_START_BLOCK 2048
++#define PARTITION_END_ALIGNMENT 2048
++
+ #ifdef _KERNEL
+ #include <sys/vdev.h>
+
+diff --git a/lib/libefi/rdwr_efi.c b/lib/libefi/rdwr_efi.c
+index 7935047e..19cb17e5 100644
+--- a/lib/libefi/rdwr_efi.c
++++ b/lib/libefi/rdwr_efi.c
+@@ -22,6 +22,7 @@
+ /*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
++ * Copyright (c) 2018 by Delphix. All rights reserved.
+ */
+
+ #include <stdio.h>
+@@ -1153,7 +1154,7 @@ efi_use_whole_disk(int fd)
+
+ /*
+ * Find the last physically non-zero partition.
+- * This is the reserved partition.
++ * This should be the reserved partition.
+ */
+ for (i = 0; i < efi_label->efi_nparts; i ++) {
+ if (resv_start < efi_label->efi_parts[i].p_start) {
+@@ -1163,6 +1164,23 @@ efi_use_whole_disk(int fd)
+ }
+
+ /*
++ * Verify that we've found the reserved partition by checking
++ * that it looks the way it did when we created it in zpool_label_disk.
++ * If we've found the incorrect partition, then we know that this
++ * device was reformatted and no longer is soley used by ZFS.
++ */
++ if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
++ (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
++ (resv_index != 8)) {
++ if (efi_debug) {
++ (void) fprintf(stderr,
++ "efi_use_whole_disk: wholedisk not available\n");
++ }
++ efi_free(efi_label);
++ return (VT_ENOSPC);
++ }
++
++ /*
+ * Find the last physically non-zero partition before that.
+ * This is the data partition.
+ */
+diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
+index e00d5f51..53bc5034 100644
+--- a/lib/libzfs/libzfs_pool.c
++++ b/lib/libzfs/libzfs_pool.c
+@@ -22,7 +22,7 @@
+ /*
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
++ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>
+ * Copyright (c) 2017 Datto Inc.
+ */
+@@ -42,6 +42,7 @@
+ #include <sys/efi_partition.h>
+ #include <sys/vtoc.h>
+ #include <sys/zfs_ioctl.h>
++#include <sys/vdev_disk.h>
+ #include <dlfcn.h>
+
+ #include "zfs_namecheck.h"
+@@ -913,17 +914,6 @@ zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
+ }
+
+ /*
+- * Don't start the slice at the default block of 34; many storage
+- * devices will use a stripe width of 128k, other vendors prefer a 1m
+- * alignment. It is best to play it safe and ensure a 1m alignment
+- * given 512B blocks. When the block size is larger by a power of 2
+- * we will still be 1m aligned. Some devices are sensitive to the
+- * partition ending alignment as well.
+- */
+-#define NEW_START_BLOCK 2048
+-#define PARTITION_END_ALIGNMENT 2048
+-
+-/*
+ * Validate the given pool name, optionally putting an extended error message in
+ * 'buf'.
+ */
+diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
+index acac2a97..b643bd35 100644
+--- a/module/zfs/vdev.c
++++ b/module/zfs/vdev.c
+@@ -21,7 +21,7 @@
+
+ /*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
++ * Copyright (c) 2011, 2018 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome at me.com>
+@@ -3039,7 +3039,6 @@ vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+ vd->vdev_max_asize - vd->vdev_asize,
+ 1ULL << tvd->vdev_ms_shift);
+ }
+- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+ if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
+ !vd->vdev_ishole) {
+ vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
+diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
+index 6761e755..6dc0544f 100644
+--- a/module/zfs/vdev_disk.c
++++ b/module/zfs/vdev_disk.c
+@@ -23,7 +23,7 @@
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Rewritten for Linux by Brian Behlendorf <behlendorf1 at llnl.gov>.
+ * LLNL-CODE-403049.
+- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
++ * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ */
+
+ #include <sys/zfs_context.h>
+@@ -35,10 +35,14 @@
+ #include <sys/zio.h>
+ #include <sys/sunldi.h>
+ #include <linux/mod_compat.h>
++#include <linux/msdos_fs.h>
+
+ char *zfs_vdev_scheduler = VDEV_SCHEDULER;
+ static void *zfs_vdev_holder = VDEV_HOLDER;
+
++/* size of the "reserved" partition, in blocks */
++#define EFI_MIN_RESV_SIZE (16 * 1024)
++
+ /*
+ * Virtual device vector for disks.
+ */
+@@ -82,17 +86,39 @@ vdev_bdev_mode(int smode)
+ }
+ #endif /* HAVE_OPEN_BDEV_EXCLUSIVE */
+
++/* The capacity (in bytes) of a bdev that is available to be used by a vdev */
+ static uint64_t
+-bdev_capacity(struct block_device *bdev)
++bdev_capacity(struct block_device *bdev, boolean_t wholedisk)
+ {
+ struct hd_struct *part = bdev->bd_part;
++ uint64_t sectors = get_capacity(bdev->bd_disk);
++ /* If there are no paritions, return the entire device capacity */
++ if (part == NULL)
++ return (sectors << SECTOR_BITS);
+
+- /* The partition capacity referenced by the block device */
+- if (part)
+- return (part->nr_sects << 9);
+-
+- /* Otherwise assume the full device capacity */
+- return (get_capacity(bdev->bd_disk) << 9);
++ /*
++ * If there are partitions, decide if we are using a `wholedisk`
++ * layout (composed of part1 and part9) or just a single partition.
++ */
++ if (wholedisk) {
++ /* Verify the expected device layout */
++ ASSERT3P(bdev, !=, bdev->bd_contains);
++ /*
++ * Sectors used by the EFI partition (part9) as well as
++ * partion alignment.
++ */
++ uint64_t used = EFI_MIN_RESV_SIZE + NEW_START_BLOCK +
++ PARTITION_END_ALIGNMENT;
++
++ /* Space available to the vdev, i.e. the size of part1 */
++ if (sectors <= used)
++ return (0);
++ uint64_t available = sectors - used;
++ return (available << SECTOR_BITS);
++ } else {
++ /* The partition capacity referenced by the block device */
++ return (part->nr_sects << SECTOR_BITS);
++ }
+ }
+
+ static void
+@@ -328,9 +354,7 @@ skip_open:
+ v->vdev_nonrot = blk_queue_nonrot(bdev_get_queue(vd->vd_bdev));
+
+ /* Physical volume size in bytes */
+- *psize = bdev_capacity(vd->vd_bdev);
+-
+- /* TODO: report possible expansion size */
++ *psize = bdev_capacity(vd->vd_bdev, v->vdev_wholedisk);
+ *max_psize = *psize;
+
+ /* Based on the minimum sector size set the block size */
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
+index d578ae60..66b6969d 100755
+--- a/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
++++ b/tests/zfs-tests/tests/functional/cli_root/zpool_expand/zpool_expand_002_pos.ksh
+@@ -26,7 +26,7 @@
+ #
+
+ #
+-# Copyright (c) 2012, 2016 by Delphix. All rights reserved.
++# Copyright (c) 2012, 2018 by Delphix. All rights reserved.
+ # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
+ #
+
+@@ -43,8 +43,9 @@
+ # 1) Create 3 files
+ # 2) Create a pool backed by the files
+ # 3) Expand the files' size with truncate
+-# 4) Use zpool online -e to online the vdevs
+-# 5) Check that the pool size was expanded
++# 4) Use zpool reopen to check the expandsize
++# 5) Use zpool online -e to online the vdevs
++# 6) Check that the pool size was expanded
+ #
+
+ verify_runnable "global"
+@@ -64,8 +65,8 @@ log_onexit cleanup
+
+ log_assert "zpool can expand after zpool online -e zvol vdevs on LUN expansion"
+
+-
+ for type in " " mirror raidz raidz2; do
++ # Initialize the file devices and the pool
+ for i in 1 2 3; do
+ log_must truncate -s $org_size ${TEMPFILE}.$i
+ done
+@@ -80,13 +81,35 @@ for type in " " mirror raidz raidz2; do
+ "$autoexp"
+ fi
+ typeset prev_size=$(get_pool_prop size $TESTPOOL1)
+- typeset zfs_prev_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
+- awk '{print $3}')
++ typeset zfs_prev_size=$(get_prop avail $TESTPOOL1)
+
++ # Increase the size of the file devices
+ for i in 1 2 3; do
+ log_must truncate -s $exp_size ${TEMPFILE}.$i
+ done
+
++ # Reopen the pool and check that the `expandsize` property is set
++ log_must zpool reopen $TESTPOOL1
++ typeset zpool_expandsize=$(get_pool_prop expandsize $TESTPOOL1)
++
++ if [[ $type == "mirror" ]]; then
++ typeset expected_zpool_expandsize=$(($exp_size-$org_size))
++ else
++ typeset expected_zpool_expandsize=$((3*($exp_size-$org_size)))
++ fi
++
++ if [[ "$zpool_expandsize" = "-" ]]; then
++ log_fail "pool $TESTPOOL1 did not detect any " \
++ "expandsize after reopen"
++ fi
++
++ if [[ $zpool_expandsize -ne $expected_zpool_expandsize ]]; then
++ log_fail "pool $TESTPOOL1 did not detect correct " \
++ "expandsize after reopen: found $zpool_expandsize," \
++ "expected $expected_zpool_expandsize"
++ fi
++
++ # Online the devices to add the new space to the pool
+ for i in 1 2 3; do
+ log_must zpool online -e $TESTPOOL1 ${TEMPFILE}.$i
+ done
+@@ -96,8 +119,7 @@ for type in " " mirror raidz raidz2; do
+ sync
+
+ typeset expand_size=$(get_pool_prop size $TESTPOOL1)
+- typeset zfs_expand_size=$(zfs get -p avail $TESTPOOL1 | tail -1 | \
+- awk '{print $3}')
++ typeset zfs_expand_size=$(get_prop avail $TESTPOOL1)
+ log_note "$TESTPOOL1 $type has previous size: $prev_size and " \
+ "expanded size: $expand_size"
+
+@@ -112,8 +134,8 @@ for type in " " mirror raidz raidz2; do
+ grep "(+${expansion_size}" | wc -l)
+
+ if [[ $size_addition -ne $i ]]; then
+- log_fail "pool $TESTPOOL1 is not autoexpand " \
+- "after LUN expansion"
++ log_fail "pool $TESTPOOL1 did not expand " \
++ "after LUN expansion and zpool online -e"
+ fi
+ elif [[ $type == "mirror" ]]; then
+ typeset expansion_size=$(($exp_size-$org_size))
+@@ -123,8 +145,8 @@ for type in " " mirror raidz raidz2; do
+ grep "(+${expansion_size})" >/dev/null 2>&1
+
+ if [[ $? -ne 0 ]]; then
+- log_fail "pool $TESTPOOL1 is not autoexpand " \
+- "after LUN expansion"
++ log_fail "pool $TESTPOOL1 did not expand " \
++ "after LUN expansion and zpool online -e"
+ fi
+ else
+ typeset expansion_size=$((3*($exp_size-$org_size)))
+@@ -134,13 +156,13 @@ for type in " " mirror raidz raidz2; do
+ grep "(+${expansion_size})" >/dev/null 2>&1
+
+ if [[ $? -ne 0 ]] ; then
+- log_fail "pool $TESTPOOL1 is not autoexpand " \
+- "after LUN expansion"
++ log_fail "pool $TESTPOOL1 did not expand " \
++ "after LUN expansion and zpool online -e"
+ fi
+ fi
+ else
+- log_fail "pool $TESTPOOL1 is not autoexpanded after LUN " \
+- "expansion"
++ log_fail "pool $TESTPOOL1 did not expand after LUN expansion " \
++ "and zpool online -e"
+ fi
+ log_must zpool destroy $TESTPOOL1
+ done
diff --git a/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch b/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch
new file mode 100644
index 0000000..e1e0b9d
--- /dev/null
+++ b/zfs-patches/0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch
@@ -0,0 +1,686 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tony Hutter <hutter2 at llnl.gov>
+Date: Wed, 6 Jun 2018 09:33:54 -0700
+Subject: [PATCH] Add pool state /proc entry, "SUSPENDED" pools
+
+1. Add a proc entry to display the pool's state:
+
+$ cat /proc/spl/kstat/zfs/tank/state
+ONLINE
+
+This is done without using the spa config locks, so it will
+never hang.
+
+2. Fix 'zpool status' and 'zpool list -o health' output to print
+"SUSPENDED" instead of "ONLINE" for suspended pools.
+
+Reviewed-by: Olaf Faaland <faaland1 at llnl.gov>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed by: Richard Elling <Richard.Elling at RichardElling.com>
+Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
+Closes #7331
+Closes #7563
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ cmd/zpool/zpool_main.c | 3 +-
+ configure.ac | 1 +
+ include/libzfs.h | 2 +
+ include/sys/spa.h | 3 +
+ lib/libspl/include/sys/kstat.h | 2 +
+ lib/libzfs/libzfs_pool.c | 46 +++++--
+ lib/libzfs/libzfs_status.c | 12 +-
+ module/zfs/spa_misc.c | 40 ++++++
+ module/zfs/spa_stats.c | 62 +++++++++
+ tests/runfiles/linux.run | 4 +
+ tests/zfs-tests/include/libtest.shlib | 38 ++++++
+ tests/zfs-tests/tests/functional/Makefile.am | 1 +
+ tests/zfs-tests/tests/functional/kstat/Makefile.am | 5 +
+ tests/zfs-tests/tests/functional/kstat/cleanup.ksh | 28 ++++
+ tests/zfs-tests/tests/functional/kstat/setup.ksh | 34 +++++
+ tests/zfs-tests/tests/functional/kstat/state.ksh | 144 +++++++++++++++++++++
+ 16 files changed, 406 insertions(+), 19 deletions(-)
+ create mode 100644 tests/zfs-tests/tests/functional/kstat/Makefile.am
+ create mode 100755 tests/zfs-tests/tests/functional/kstat/cleanup.ksh
+ create mode 100755 tests/zfs-tests/tests/functional/kstat/setup.ksh
+ create mode 100755 tests/zfs-tests/tests/functional/kstat/state.ksh
+
+diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
+index b0756938..97697011 100644
+--- a/cmd/zpool/zpool_main.c
++++ b/cmd/zpool/zpool_main.c
+@@ -6226,7 +6226,8 @@ status_callback(zpool_handle_t *zhp, void *data)
+ &nvroot) == 0);
+ verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t **)&vs, &c) == 0);
+- health = zpool_state_to_name(vs->vs_state, vs->vs_aux);
++
++ health = zpool_get_state_str(zhp);
+
+ (void) printf(gettext(" pool: %s\n"), zpool_get_name(zhp));
+ (void) printf(gettext(" state: %s\n"), health);
+diff --git a/configure.ac b/configure.ac
+index 3f4925c3..42cfc1a3 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -253,6 +253,7 @@ AC_CONFIG_FILES([
+ tests/zfs-tests/tests/functional/history/Makefile
+ tests/zfs-tests/tests/functional/inheritance/Makefile
+ tests/zfs-tests/tests/functional/inuse/Makefile
++ tests/zfs-tests/tests/functional/kstat/Makefile
+ tests/zfs-tests/tests/functional/large_files/Makefile
+ tests/zfs-tests/tests/functional/largest_pool/Makefile
+ tests/zfs-tests/tests/functional/link_count/Makefile
+diff --git a/include/libzfs.h b/include/libzfs.h
+index 945bd5b8..fea2fee4 100644
+--- a/include/libzfs.h
++++ b/include/libzfs.h
+@@ -296,6 +296,8 @@ int zfs_dev_is_whole_disk(char *dev_name);
+ char *zfs_get_underlying_path(char *dev_name);
+ char *zfs_get_enclosure_sysfs_path(char *dev_name);
+
++const char *zpool_get_state_str(zpool_handle_t *);
++
+ /*
+ * Functions to manage pool properties
+ */
+diff --git a/include/sys/spa.h b/include/sys/spa.h
+index 3b268419..810999c9 100644
+--- a/include/sys/spa.h
++++ b/include/sys/spa.h
+@@ -730,6 +730,7 @@ typedef struct spa_stats {
+ spa_stats_history_t tx_assign_histogram;
+ spa_stats_history_t io_history;
+ spa_stats_history_t mmp_history;
++ spa_stats_history_t state; /* pool state */
+ } spa_stats_t;
+
+ typedef enum txg_state {
+@@ -889,6 +890,8 @@ extern void spa_history_log_internal_ds(struct dsl_dataset *ds, const char *op,
+ extern void spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
+ dmu_tx_t *tx, const char *fmt, ...);
+
++extern const char *spa_state_to_name(spa_t *spa);
++
+ /* error handling */
+ struct zbookmark_phys;
+ extern void spa_log_error(spa_t *spa, zio_t *zio);
+diff --git a/lib/libspl/include/sys/kstat.h b/lib/libspl/include/sys/kstat.h
+index fcd3ed98..84c3d7ca 100644
+--- a/lib/libspl/include/sys/kstat.h
++++ b/lib/libspl/include/sys/kstat.h
+@@ -304,6 +304,8 @@ typedef struct kstat32 {
+ #define KSTAT_FLAG_PERSISTENT 0x08
+ #define KSTAT_FLAG_DORMANT 0x10
+ #define KSTAT_FLAG_INVALID 0x20
++#define KSTAT_FLAG_LONGSTRINGS 0x40
++#define KSTAT_FLAG_NO_HEADERS 0x80
+
+ /*
+ * Dynamic update support
+diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
+index 53bc5034..315ba954 100644
+--- a/lib/libzfs/libzfs_pool.c
++++ b/lib/libzfs/libzfs_pool.c
+@@ -240,6 +240,38 @@ zpool_pool_state_to_name(pool_state_t state)
+ }
+
+ /*
++ * Given a pool handle, return the pool health string ("ONLINE", "DEGRADED",
++ * "SUSPENDED", etc).
++ */
++const char *
++zpool_get_state_str(zpool_handle_t *zhp)
++{
++ zpool_errata_t errata;
++ zpool_status_t status;
++ nvlist_t *nvroot;
++ vdev_stat_t *vs;
++ uint_t vsc;
++ const char *str;
++
++ status = zpool_get_status(zhp, NULL, &errata);
++
++ if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
++ str = gettext("FAULTED");
++ } else if (status == ZPOOL_STATUS_IO_FAILURE_WAIT ||
++ status == ZPOOL_STATUS_IO_FAILURE_MMP) {
++ str = gettext("SUSPENDED");
++ } else {
++ verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
++ ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
++ verify(nvlist_lookup_uint64_array(nvroot,
++ ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
++ == 0);
++ str = zpool_state_to_name(vs->vs_state, vs->vs_aux);
++ }
++ return (str);
++}
++
++/*
+ * Get a zpool property value for 'prop' and return the value in
+ * a pre-allocated buffer.
+ */
+@@ -250,9 +282,6 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
+ uint64_t intval;
+ const char *strval;
+ zprop_source_t src = ZPROP_SRC_NONE;
+- nvlist_t *nvroot;
+- vdev_stat_t *vs;
+- uint_t vsc;
+
+ if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
+ switch (prop) {
+@@ -261,7 +290,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
+ break;
+
+ case ZPOOL_PROP_HEALTH:
+- (void) strlcpy(buf, "FAULTED", len);
++ (void) strlcpy(buf, zpool_get_state_str(zhp), len);
+ break;
+
+ case ZPOOL_PROP_GUID:
+@@ -362,14 +391,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf,
+ break;
+
+ case ZPOOL_PROP_HEALTH:
+- verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
+- ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
+- verify(nvlist_lookup_uint64_array(nvroot,
+- ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
+- == 0);
+-
+- (void) strlcpy(buf, zpool_state_to_name(intval,
+- vs->vs_aux), len);
++ (void) strlcpy(buf, zpool_get_state_str(zhp), len);
+ break;
+ case ZPOOL_PROP_VERSION:
+ if (intval >= SPA_VERSION_FEATURES) {
+diff --git a/lib/libzfs/libzfs_status.c b/lib/libzfs/libzfs_status.c
+index 6cdcd382..5e423f3a 100644
+--- a/lib/libzfs/libzfs_status.c
++++ b/lib/libzfs/libzfs_status.c
+@@ -403,12 +403,12 @@ zpool_status_t
+ zpool_get_status(zpool_handle_t *zhp, char **msgid, zpool_errata_t *errata)
+ {
+ zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE, errata);
+-
+- if (ret >= NMSGID)
+- *msgid = NULL;
+- else
+- *msgid = zfs_msgid_table[ret];
+-
++ if (msgid != NULL) {
++ if (ret >= NMSGID)
++ *msgid = NULL;
++ else
++ *msgid = zfs_msgid_table[ret];
++ }
+ return (ret);
+ }
+
+diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
+index e92c3948..cc1c641d 100644
+--- a/module/zfs/spa_misc.c
++++ b/module/zfs/spa_misc.c
+@@ -2100,6 +2100,45 @@ spa_get_hostid(void)
+ return (myhostid);
+ }
+
++/*
++ * Return the pool state string ("ONLINE", "DEGRADED", "SUSPENDED", etc).
++ */
++const char *
++spa_state_to_name(spa_t *spa)
++{
++ vdev_state_t state = spa->spa_root_vdev->vdev_state;
++ vdev_aux_t aux = spa->spa_root_vdev->vdev_stat.vs_aux;
++
++ if (spa_suspended(spa) &&
++ (spa_get_failmode(spa) != ZIO_FAILURE_MODE_CONTINUE))
++ return ("SUSPENDED");
++
++ switch (state) {
++ case VDEV_STATE_CLOSED:
++ case VDEV_STATE_OFFLINE:
++ return ("OFFLINE");
++ case VDEV_STATE_REMOVED:
++ return ("REMOVED");
++ case VDEV_STATE_CANT_OPEN:
++ if (aux == VDEV_AUX_CORRUPT_DATA || aux == VDEV_AUX_BAD_LOG)
++ return ("FAULTED");
++ else if (aux == VDEV_AUX_SPLIT_POOL)
++ return ("SPLIT");
++ else
++ return ("UNAVAIL");
++ case VDEV_STATE_FAULTED:
++ return ("FAULTED");
++ case VDEV_STATE_DEGRADED:
++ return ("DEGRADED");
++ case VDEV_STATE_HEALTHY:
++ return ("ONLINE");
++ default:
++ break;
++ }
++
++ return ("UNKNOWN");
++}
++
+ #if defined(_KERNEL) && defined(HAVE_SPL)
+ /* Namespace manipulation */
+ EXPORT_SYMBOL(spa_lookup);
+@@ -2178,6 +2217,7 @@ EXPORT_SYMBOL(spa_is_root);
+ EXPORT_SYMBOL(spa_writeable);
+ EXPORT_SYMBOL(spa_mode);
+ EXPORT_SYMBOL(spa_namespace_lock);
++EXPORT_SYMBOL(spa_state_to_name);
+
+ /* BEGIN CSTYLED */
+ module_param(zfs_flags, uint, 0644);
+diff --git a/module/zfs/spa_stats.c b/module/zfs/spa_stats.c
+index 8950d9c5..ca3d0be7 100644
+--- a/module/zfs/spa_stats.c
++++ b/module/zfs/spa_stats.c
+@@ -22,6 +22,8 @@
+ #include <sys/zfs_context.h>
+ #include <sys/spa_impl.h>
+ #include <sys/vdev_impl.h>
++#include <sys/spa.h>
++#include <zfs_comutil.h>
+
+ /*
+ * Keeps stats on last N reads per spa_t, disabled by default.
+@@ -992,6 +994,64 @@ spa_mmp_history_add(spa_t *spa, uint64_t txg, uint64_t timestamp,
+ return ((void *)smh);
+ }
+
++static void *
++spa_state_addr(kstat_t *ksp, loff_t n)
++{
++ return (ksp->ks_private); /* return the spa_t */
++}
++
++static int
++spa_state_data(char *buf, size_t size, void *data)
++{
++ spa_t *spa = (spa_t *)data;
++ (void) snprintf(buf, size, "%s\n", spa_state_to_name(spa));
++ return (0);
++}
++
++/*
++ * Return the state of the pool in /proc/spl/kstat/zfs/<pool>/state.
++ *
++ * This is a lock-less read of the pool's state (unlike using 'zpool', which
++ * can potentially block for seconds). Because it doesn't block, it can useful
++ * as a pool heartbeat value.
++ */
++static void
++spa_state_init(spa_t *spa)
++{
++ spa_stats_history_t *ssh = &spa->spa_stats.state;
++ char *name;
++ kstat_t *ksp;
++
++ mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
++
++ name = kmem_asprintf("zfs/%s", spa_name(spa));
++ ksp = kstat_create(name, 0, "state", "misc",
++ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
++
++ ssh->kstat = ksp;
++ if (ksp) {
++ ksp->ks_lock = &ssh->lock;
++ ksp->ks_data = NULL;
++ ksp->ks_private = spa;
++ ksp->ks_flags |= KSTAT_FLAG_NO_HEADERS;
++ kstat_set_raw_ops(ksp, NULL, spa_state_data, spa_state_addr);
++ kstat_install(ksp);
++ }
++
++ strfree(name);
++}
++
++static void
++spa_health_destroy(spa_t *spa)
++{
++ spa_stats_history_t *ssh = &spa->spa_stats.state;
++ kstat_t *ksp = ssh->kstat;
++ if (ksp)
++ kstat_delete(ksp);
++
++ mutex_destroy(&ssh->lock);
++}
++
+ void
+ spa_stats_init(spa_t *spa)
+ {
+@@ -1000,11 +1060,13 @@ spa_stats_init(spa_t *spa)
+ spa_tx_assign_init(spa);
+ spa_io_history_init(spa);
+ spa_mmp_history_init(spa);
++ spa_state_init(spa);
+ }
+
+ void
+ spa_stats_destroy(spa_t *spa)
+ {
++ spa_health_destroy(spa);
+ spa_tx_assign_destroy(spa);
+ spa_txg_history_destroy(spa);
+ spa_read_history_destroy(spa);
+diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
+index 379c9f73..69e9eb26 100644
+--- a/tests/runfiles/linux.run
++++ b/tests/runfiles/linux.run
+@@ -467,6 +467,10 @@ tests = ['inuse_001_pos', 'inuse_003_pos', 'inuse_004_pos',
+ post =
+ tags = ['functional', 'inuse']
+
++[tests/functional/kstat]
++tests = ['state']
++tags = ['functional', 'kstat']
++
+ [tests/functional/large_files]
+ tests = ['large_files_001_pos', 'large_files_002_pos']
+ tags = ['functional', 'large_files']
+diff --git a/tests/zfs-tests/include/libtest.shlib b/tests/zfs-tests/include/libtest.shlib
+index 13c85912..86dae6ea 100644
+--- a/tests/zfs-tests/include/libtest.shlib
++++ b/tests/zfs-tests/include/libtest.shlib
+@@ -26,6 +26,7 @@
+ # Copyright 2016 Nexenta Systems, Inc.
+ # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
+ # Copyright (c) 2017 Datto Inc.
++# Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
+ #
+
+ . ${STF_TOOLS}/include/logapi.shlib
+@@ -3718,3 +3719,40 @@ function get_pool_devices #testpool #devdir
+ fi
+ echo $out
+ }
++
++#
++# Get scsi_debug device name.
++# Returns basename of scsi_debug device (for example "sdb").
++#
++function get_debug_device
++{
++ for i in {1..10} ; do
++ val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3)
++
++ # lsscsi can take time to settle
++ if [ "$val" != "-" ] ; then
++ break
++ fi
++ sleep 1
++ done
++ echo "$val"
++}
++
++#
++# Returns SCSI host number for the given disk
++#
++function get_scsi_host #disk
++{
++ typeset disk=$1
++ ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
++}
++
++#
++# Simulate disk removal
++#
++function remove_disk #disk
++{
++ typeset disk=$1
++ on_off_disk $disk "offline"
++ block_device_wait
++}
+diff --git a/tests/zfs-tests/tests/functional/Makefile.am b/tests/zfs-tests/tests/functional/Makefile.am
+index ea52205a..bbbf3ba0 100644
+--- a/tests/zfs-tests/tests/functional/Makefile.am
++++ b/tests/zfs-tests/tests/functional/Makefile.am
+@@ -24,6 +24,7 @@ SUBDIRS = \
+ history \
+ inheritance \
+ inuse \
++ kstat \
+ large_files \
+ largest_pool \
+ libzfs \
+diff --git a/tests/zfs-tests/tests/functional/kstat/Makefile.am b/tests/zfs-tests/tests/functional/kstat/Makefile.am
+new file mode 100644
+index 00000000..8ad83ec3
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/kstat/Makefile.am
+@@ -0,0 +1,5 @@
++pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/kstat
++dist_pkgdata_SCRIPTS = \
++ setup.ksh \
++ cleanup.ksh \
++ state.ksh
+diff --git a/tests/zfs-tests/tests/functional/kstat/cleanup.ksh b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh
+new file mode 100755
+index 00000000..8a212ce3
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/kstat/cleanup.ksh
+@@ -0,0 +1,28 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++#
++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++default_cleanup
+diff --git a/tests/zfs-tests/tests/functional/kstat/setup.ksh b/tests/zfs-tests/tests/functional/kstat/setup.ksh
+new file mode 100755
+index 00000000..57717a09
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/kstat/setup.ksh
+@@ -0,0 +1,34 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++#
++#
++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
++#
++
++. $STF_SUITE/include/libtest.shlib
++
++if ! is_linux ; then
++ log_unsupported "/proc/spl/kstat/<pool>/health only supported on Linux"
++fi
++
++default_mirror_setup $DISKS
++
++log_pass
+diff --git a/tests/zfs-tests/tests/functional/kstat/state.ksh b/tests/zfs-tests/tests/functional/kstat/state.ksh
+new file mode 100755
+index 00000000..bf0b6e31
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/kstat/state.ksh
+@@ -0,0 +1,144 @@
++#!/bin/ksh -p
++#
++# CDDL HEADER START
++#
++# The contents of this file are subject to the terms of the
++# Common Development and Distribution License (the "License").
++# You may not use this file except in compliance with the License.
++#
++# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
++# or http://www.opensolaris.org/os/licensing.
++# See the License for the specific language governing permissions
++# and limitations under the License.
++#
++# When distributing Covered Code, include this CDDL HEADER in each
++# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
++# If applicable, add the following below this CDDL HEADER, with the
++# fields enclosed by brackets "[]" replaced with your own identifying
++# information: Portions Copyright [yyyy] [name of copyright owner]
++#
++# CDDL HEADER END
++
++#
++# Copyright (c) 2018 by Lawrence Livermore National Security, LLC.
++#
++
++#
++# DESCRIPTION:
++# Test /proc/spl/kstat/zfs/<pool>/state kstat
++#
++# STRATEGY:
++# 1. Create a mirrored pool
++# 2. Check that pool is ONLINE
++# 3. Fault one disk
++# 4. Check that pool is DEGRADED
++# 5. Create a new pool with a single scsi_debug disk
++# 6. Remove the disk
++# 7. Check that pool is SUSPENDED
++# 8. Add the disk back in
++# 9. Clear errors and destroy the pools
++
++. $STF_SUITE/include/libtest.shlib
++
++verify_runnable "both"
++
++function cleanup
++{
++ # Destroy the scsi_debug pool
++ if [ -n "$TESTPOOL2" ] ; then
++ if [ -n "$host" ] ; then
++ # Re-enable the disk
++ scan_scsi_hosts $host
++
++ # Device may have changed names after being inserted
++ SDISK=$(get_debug_device)
++ log_must ln $DEV_RDSKDIR/$SDISK $REALDISK
++ fi
++
++ # Restore our working pool image
++ if [ -n "$BACKUP" ] ; then
++ gunzip -c $BACKUP > $REALDISK
++ log_must rm -f $BACKUP
++ fi
++
++ # Our disk is back. Now we can clear errors and destroy the
++ # pool cleanly.
++ log_must zpool clear $TESTPOOL2
++
++ # Now that the disk is back and errors cleared, wait for our
++ # hung 'zpool scrub' to finish.
++ wait
++
++ destroy_pool $TESTPOOL2
++ log_must rm $REALDISK
++ unload_scsi_debug
++ fi
++}
++
++# Check that our pool state values match what's expected
++#
++# $1: pool name
++# $2: expected state ("ONLINE", "DEGRADED", "SUSPENDED", etc)
++function check_all
++{
++ pool=$1
++ expected=$2
++
++ state1=$(zpool status $pool | awk '/state: /{print $2}');
++ state2=$(zpool list -H -o health $pool)
++ state3=$(cat /proc/spl/kstat/zfs/$pool/state)
++ log_note "Checking $expected = $state1 = $state2 = $state3"
++ if [[ "$expected" == "$state1" && "$expected" == "$state2" && \
++ "$expected" == "$state3" ]] ; then
++ true
++ else
++ false
++ fi
++}
++
++log_onexit cleanup
++
++log_assert "Testing /proc/spl/kstat/zfs/<pool>/state kstat"
++
++# Test that the initial pool is healthy
++check_all $TESTPOOL "ONLINE"
++
++# Fault one of the disks, and check that pool is degraded
++DISK1=$(echo "$DISKS" | awk '{print $2}')
++zpool offline -tf $TESTPOOL $DISK1
++check_all $TESTPOOL "DEGRADED"
++
++# Create a new pool out of a scsi_debug disk
++TESTPOOL2=testpool2
++MINVDEVSIZE_MB=$((MINVDEVSIZE / 1048576))
++load_scsi_debug $MINVDEVSIZE_MB 1 1 1 '512b'
++
++SDISK=$(get_debug_device)
++host=$(get_scsi_host $SDISK)
++
++# Use $REALDISK instead of $SDISK in our pool because $SDISK can change names
++# as we remove/add the disk (i.e. /dev/sdf -> /dev/sdg).
++REALDISK=/dev/kstat-state-realdisk
++log_must [ ! -e $REALDISK ]
++ln $DEV_RDSKDIR/$SDISK $REALDISK
++
++log_must zpool create $TESTPOOL2 $REALDISK
++
++# Backup the contents of the disk image
++BACKUP=/tmp/kstat-state-realdisk.gz
++log_must [ ! -e $BACKUP ]
++gzip -c $REALDISK > $BACKUP
++
++# Yank out the disk from under the pool
++log_must rm $REALDISK
++remove_disk $SDISK
++
++# Run a 'zpool scrub' in the background to suspend the pool. We run it in the
++# background since the command will hang when the pool gets suspended. The
++# command will resume and exit after we restore the missing disk later on.
++zpool scrub $TESTPOOL2 &
++sleep 1 # Give the scrub some time to run before we check if it fails
++
++log_must check_all $TESTPOOL2 "SUSPENDED"
++
++log_pass "/proc/spl/kstat/zfs/<pool>/state test successful"
diff --git a/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch b/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch
new file mode 100644
index 0000000..82219a7
--- /dev/null
+++ b/zfs-patches/0016-Linux-4.14-compat-blk_queue_stackable.patch
@@ -0,0 +1,115 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Tue, 19 Jun 2018 21:52:45 -0700
+Subject: [PATCH] Linux 4.14 compat: blk_queue_stackable()
+
+The blk_queue_stackable() function was replaced in the 4.14 kernel
+by queue_is_rq_based(), commit torvalds/linux at 5fdee212. This change
+resulted in the default elevator being used which can negatively
+impact performance.
+
+Rather than adding additional compatibility code to detect the
+new interface unconditionally attempt to set the elevator. Since
+we expect this to fail for block devices without an elevator the
+error message has been moved in to zfs_dbgmsg().
+
+Finally, it was observed that the elevator_change() was removed
+from the 4.12 kernel, commit torvalds/linux at c033269. Update the
+comment to clearly specify which are expected to export the
+elevator_change() symbol.
+
+Reviewed-by: Matthew Ahrens <mahrens at delphix.com>
+Reviewed-by: Tony Hutter <hutter2 at llnl.gov>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7645
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ config/kernel-elevator-change.m4 | 4 ++--
+ include/linux/blkdev_compat.h | 11 -----------
+ module/zfs/vdev_disk.c | 22 ++++++++++------------
+ 3 files changed, 12 insertions(+), 25 deletions(-)
+
+diff --git a/config/kernel-elevator-change.m4 b/config/kernel-elevator-change.m4
+index ace5aa82..eba25257 100644
+--- a/config/kernel-elevator-change.m4
++++ b/config/kernel-elevator-change.m4
+@@ -1,6 +1,6 @@
+ dnl #
+-dnl # 2.6.36 API change
+-dnl # Verify the elevator_change() symbol is available.
++dnl # 2.6.36 API, exported elevator_change() symbol
++dnl # 4.12 API, removed elevator_change() symbol
+ dnl #
+ AC_DEFUN([ZFS_AC_KERNEL_ELEVATOR_CHANGE], [
+ AC_MSG_CHECKING([whether elevator_change() is available])
+diff --git a/include/linux/blkdev_compat.h b/include/linux/blkdev_compat.h
+index 27f05662..c8cdf38e 100644
+--- a/include/linux/blkdev_compat.h
++++ b/include/linux/blkdev_compat.h
+@@ -106,17 +106,6 @@ blk_queue_set_write_cache(struct request_queue *q, bool wc, bool fua)
+ #endif
+
+ /*
+- * 2.6.27 API change,
+- * The blk_queue_stackable() queue flag was added in 2.6.27 to handle dm
+- * stacking drivers. Prior to this request stacking drivers were detected
+- * by checking (q->request_fn == NULL), for earlier kernels we revert to
+- * this legacy behavior.
+- */
+-#ifndef blk_queue_stackable
+-#define blk_queue_stackable(q) ((q)->request_fn == NULL)
+-#endif
+-
+-/*
+ * 2.6.34 API change,
+ * The blk_queue_max_hw_sectors() function replaces blk_queue_max_sectors().
+ */
+diff --git a/module/zfs/vdev_disk.c b/module/zfs/vdev_disk.c
+index 6dc0544f..c5708cb2 100644
+--- a/module/zfs/vdev_disk.c
++++ b/module/zfs/vdev_disk.c
+@@ -168,23 +168,20 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
+ if (!v->vdev_wholedisk && strncmp(device, "dm-", 3) != 0)
+ return;
+
+- /* Skip devices without schedulers (loop, ram, dm, etc) */
+- if (!q->elevator || !blk_queue_stackable(q))
+- return;
+-
+ /* Leave existing scheduler when set to "none" */
+ if ((strncmp(elevator, "none", 4) == 0) && (strlen(elevator) == 4))
+ return;
+
++ /*
++ * The elevator_change() function was available in kernels from
++ * 2.6.36 to 4.11. When not available fall back to using the user
++ * mode helper functionality to set the elevator via sysfs. This
++ * requires /bin/echo and sysfs to be mounted which may not be true
++ * early in the boot process.
++ */
+ #ifdef HAVE_ELEVATOR_CHANGE
+ error = elevator_change(q, elevator);
+ #else
+- /*
+- * For pre-2.6.36 kernels elevator_change() is not available.
+- * Therefore we fall back to using a usermodehelper to echo the
+- * elevator into sysfs; This requires /bin/echo and sysfs to be
+- * mounted which may not be true early in the boot process.
+- */
+ #define SET_SCHEDULER_CMD \
+ "exec 0</dev/null " \
+ " 1>/sys/block/%s/queue/scheduler " \
+@@ -198,9 +195,10 @@ vdev_elevator_switch(vdev_t *v, char *elevator)
+ error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
+ strfree(argv[2]);
+ #endif /* HAVE_ELEVATOR_CHANGE */
+- if (error)
+- printk("ZFS: Unable to set \"%s\" scheduler for %s (%s): %d\n",
++ if (error) {
++ zfs_dbgmsg("Unable to set \"%s\" scheduler for %s (%s): %d\n",
+ elevator, v->vdev_path, device, error);
++ }
+ }
+
+ /*
diff --git a/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch b/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch
new file mode 100644
index 0000000..c1ec08a
--- /dev/null
+++ b/zfs-patches/0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch
@@ -0,0 +1,54 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Troels=20N=C3=B8rgaard?= <tnn at tradeshift.com>
+Date: Sat, 7 Jul 2018 01:15:19 +0200
+Subject: [PATCH] Default ashift for Amazon EC2 NVMe devices
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Add a default 4 KiB ashift for Amazon EC2 NVMe devices on instances with
+NVMe ephemeral devices, such as the types c5d, f1, i3 and m5d.
+As per the official documentation [1] a 4096 byte blocksize should be
+used to match the underlying hardware.
+
+The string was identified via:
+
+$ sudo sginfo -M /dev/nvme0n1
+INQUIRY response (cmd: 0x12)
+----------------------------
+Device Type 0
+Vendor: NVMe
+Product: Amazon EC2 NVMe
+Revision level:
+
+$ lsblk -io KNAME,TYPE,SIZE,MODEL
+KNAME TYPE SIZE MODEL
+nvme0n1 disk 442.4G Amazon EC2 NVMe Instance Storage
+
+[1] https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/
+ storage-optimized-instances.html
+ Retrived 2018-07-03
+
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Giuseppe Di Natale <guss80 at gmail.com>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Troels Nørgaard <tnn at tradeshift.com>
+Closes #7676
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ cmd/zpool/zpool_vdev.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/cmd/zpool/zpool_vdev.c b/cmd/zpool/zpool_vdev.c
+index fd6bd9e7..69ff7ff6 100644
+--- a/cmd/zpool/zpool_vdev.c
++++ b/cmd/zpool/zpool_vdev.c
+@@ -191,6 +191,7 @@ static vdev_disk_db_entry_t vdev_disk_database[] = {
+ {"ATA INTEL SSDSC2BP24", 4096},
+ {"ATA INTEL SSDSC2BP48", 4096},
+ {"NA SmrtStorSDLKAE9W", 4096},
++ {"NVMe Amazon EC2 NVMe ", 4096},
+ /* Imported from Open Solaris */
+ {"ATA MARVELL SD88SA02", 4096},
+ /* Advanced format Hard drives */
diff --git a/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch b/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch
new file mode 100644
index 0000000..f8870a6
--- /dev/null
+++ b/zfs-patches/0018-Fix-kernel-unaligned-access-on-sparc64.patch
@@ -0,0 +1,123 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Wed, 11 Jul 2018 13:10:40 -0700
+Subject: [PATCH] Fix kernel unaligned access on sparc64
+
+Update the SA_COPY_DATA macro to check if architecture supports
+efficient unaligned memory accesses at compile time. Otherwise
+fallback to using the sa_copy_data() function.
+
+The kernel provided CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS is
+used to determine availability in kernel space. In user space
+the x86_64, x86, powerpc, and sometimes arm architectures will
+define the HAVE_EFFICIENT_UNALIGNED_ACCESS macro.
+
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7642
+Closes #7684
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ lib/libspl/include/sys/isa_defs.h | 7 +++++++
+ module/icp/algs/modes/ccm.c | 2 +-
+ module/zfs/sa.c | 35 ++++++++++++++++++++---------------
+ 3 files changed, 28 insertions(+), 16 deletions(-)
+
+diff --git a/lib/libspl/include/sys/isa_defs.h b/lib/libspl/include/sys/isa_defs.h
+index a5bea039..7a90e077 100644
+--- a/lib/libspl/include/sys/isa_defs.h
++++ b/lib/libspl/include/sys/isa_defs.h
+@@ -55,6 +55,7 @@ extern "C" {
+ #endif
+
+ #define _SUNOS_VTOC_16
++#define HAVE_EFFICIENT_UNALIGNED_ACCESS
+
+ /* i386 arch specific defines */
+ #elif defined(__i386) || defined(__i386__)
+@@ -76,6 +77,7 @@ extern "C" {
+ #endif
+
+ #define _SUNOS_VTOC_16
++#define HAVE_EFFICIENT_UNALIGNED_ACCESS
+
+ /* powerpc arch specific defines */
+ #elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
+@@ -99,6 +101,7 @@ extern "C" {
+ #endif
+
+ #define _SUNOS_VTOC_16
++#define HAVE_EFFICIENT_UNALIGNED_ACCESS
+
+ /* arm arch specific defines */
+ #elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
+@@ -129,6 +132,10 @@ extern "C" {
+
+ #define _SUNOS_VTOC_16
+
++#if defined(__ARM_FEATURE_UNALIGNED)
++#define HAVE_EFFICIENT_UNALIGNED_ACCESS
++#endif
++
+ /* sparc arch specific defines */
+ #elif defined(__sparc) || defined(__sparc__)
+
+diff --git a/module/icp/algs/modes/ccm.c b/module/icp/algs/modes/ccm.c
+index 22aeb0a6..fb41194f 100644
+--- a/module/icp/algs/modes/ccm.c
++++ b/module/icp/algs/modes/ccm.c
+@@ -28,7 +28,7 @@
+ #include <sys/crypto/common.h>
+ #include <sys/crypto/impl.h>
+
+-#if defined(__i386) || defined(__amd64)
++#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
+ #include <sys/byteorder.h>
+ #define UNALIGNED_POINTERS_PERMITTED
+ #endif
+diff --git a/module/zfs/sa.c b/module/zfs/sa.c
+index 8046dbde..1fb1a8b5 100644
+--- a/module/zfs/sa.c
++++ b/module/zfs/sa.c
+@@ -147,21 +147,26 @@ arc_byteswap_func_t sa_bswap_table[] = {
+ zfs_acl_byteswap,
+ };
+
+-#define SA_COPY_DATA(f, s, t, l) \
+- { \
+- if (f == NULL) { \
+- if (l == 8) { \
+- *(uint64_t *)t = *(uint64_t *)s; \
+- } else if (l == 16) { \
+- *(uint64_t *)t = *(uint64_t *)s; \
+- *(uint64_t *)((uintptr_t)t + 8) = \
+- *(uint64_t *)((uintptr_t)s + 8); \
+- } else { \
+- bcopy(s, t, l); \
+- } \
+- } else \
+- sa_copy_data(f, s, t, l); \
+- }
++#ifdef HAVE_EFFICIENT_UNALIGNED_ACCESS
++#define SA_COPY_DATA(f, s, t, l) \
++do { \
++ if (f == NULL) { \
++ if (l == 8) { \
++ *(uint64_t *)t = *(uint64_t *)s; \
++ } else if (l == 16) { \
++ *(uint64_t *)t = *(uint64_t *)s; \
++ *(uint64_t *)((uintptr_t)t + 8) = \
++ *(uint64_t *)((uintptr_t)s + 8); \
++ } else { \
++ bcopy(s, t, l); \
++ } \
++ } else { \
++ sa_copy_data(f, s, t, l); \
++ } \
++} while (0)
++#else
++#define SA_COPY_DATA(f, s, t, l) sa_copy_data(f, s, t, l)
++#endif
+
+ /*
+ * This table is fixed and cannot be changed. Its purpose is to
diff --git a/zfs-patches/0004-Fix-zpl_mount-deadlock.patch b/zfs-patches/0019-Fix-zpl_mount-deadlock.patch
similarity index 97%
rename from zfs-patches/0004-Fix-zpl_mount-deadlock.patch
rename to zfs-patches/0019-Fix-zpl_mount-deadlock.patch
index 8947309..93ee7ce 100644
--- a/zfs-patches/0004-Fix-zpl_mount-deadlock.patch
+++ b/zfs-patches/0019-Fix-zpl_mount-deadlock.patch
@@ -48,7 +48,6 @@ Closes #7659
Closes #7691
Closes #7693
-(Cherry-picked from ac09630d8b0bf6c92084a30fdaefd03fd0adbdc1)
Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
---
include/sys/zfs_vfsops.h | 1 +
diff --git a/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch b/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch
new file mode 100644
index 0000000..395554b
--- /dev/null
+++ b/zfs-patches/0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch
@@ -0,0 +1,133 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Toomas Soome <tsoome at me.com>
+Date: Wed, 1 Jun 2016 19:18:10 +0300
+Subject: [PATCH] OpenZFS 8906 - uts: illumos rootfs should support salted
+ cksum
+
+Porting notes:
+* As of grub-2.02 these checksums are not supported. However, as
+ pointed out in #6501 there are alternatives such as EFISTUB which
+ work and have no such restriction. A warning was added to the
+ checksum property section of the zfs.8 man page.
+
+Authored by: Toomas Soome <tsoome at me.com>
+Reviewed by: C Fraire <cfraire at me.com>
+Reviewed by: Robert Mustacchi <rm at joyent.com>
+Reviewed by: Yuri Pankov <yuripv at yuripv.net>
+Approved by: Dan McDonald <danmcd at joyent.com>
+Ported-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+
+OpenZFS-issue: https://illumos.org/issues/8906
+OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7dec52f
+Closes #6501
+Closes #7714
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ man/man5/zpool-features.5 | 18 +++++++-----------
+ man/man8/zfs.8 | 5 ++++-
+ module/zfs/zfs_ioctl.c | 11 +----------
+ 3 files changed, 12 insertions(+), 22 deletions(-)
+
+diff --git a/man/man5/zpool-features.5 b/man/man5/zpool-features.5
+index 78ea559f..140ce269 100644
+--- a/man/man5/zpool-features.5
++++ b/man/man5/zpool-features.5
+@@ -14,7 +14,7 @@
+ .\" CDDL HEADER, with the fields enclosed by brackets "[]" replaced with your
+ .\" own identifying information:
+ .\" Portions Copyright [yyyy] [name of copyright owner]
+-.TH ZPOOL-FEATURES 5 "Aug 27, 2013"
++.TH ZPOOL-FEATURES 5 "Jun 8, 2018"
+ .SH NAME
+ zpool\-features \- ZFS pool feature descriptions
+ .SH DESCRIPTION
+@@ -248,8 +248,9 @@ immediately activate the \fBlz4_compress\fR feature on the underlying
+ pool using the \fBzfs\fR(1M) command. Also, all newly written metadata
+ will be compressed with \fBlz4\fR algorithm. Since this feature is not
+ read-only compatible, this operation will render the pool unimportable
+-on systems without support for the \fBlz4_compress\fR feature. Booting
+-off of \fBlz4\fR-compressed root pools is supported.
++on systems without support for the \fBlz4_compress\fR feature.
++
++Booting off of \fBlz4\fR-compressed root pools is supported.
+
+ This feature becomes \fBactive\fR as soon as it is enabled and will
+ never return to being \fBenabled\fB.
+@@ -510,8 +511,7 @@ can turn on the \fBsha512\fR checksum on any dataset using the
+ and will return to being \fBenabled\fR once all filesystems that have
+ ever had their checksum set to \fBsha512\fR are destroyed.
+
+-Booting off of pools utilizing SHA-512/256 is supported (provided that
+-the updated GRUB stage2 module is installed).
++Booting off of pools utilizing SHA-512/256 is supported.
+
+ .RE
+
+@@ -545,9 +545,7 @@ can turn on the \fBskein\fR checksum on any dataset using the
+ and will return to being \fBenabled\fR once all filesystems that have
+ ever had their checksum set to \fBskein\fR are destroyed.
+
+-Booting off of pools using \fBskein\fR is \fBNOT\fR supported
+--- any attempt to enable \fBskein\fR on a root pool will fail with an
+-error.
++Booting off of pools using \fBskein\fR is supported.
+
+ .RE
+
+@@ -587,9 +585,7 @@ can turn on the \fBedonr\fR checksum on any dataset using the
+ and will return to being \fBenabled\fR once all filesystems that have
+ ever had their checksum set to \fBedonr\fR are destroyed.
+
+-Booting off of pools using \fBedonr\fR is \fBNOT\fR supported
+--- any attempt to enable \fBedonr\fR on a root pool will fail with an
+-error.
++Booting off of pools using \fBedonr\fR is supported.
+
+ .RE
+
+diff --git a/man/man8/zfs.8 b/man/man8/zfs.8
+index 48a5e6ea..bb3b46e3 100644
+--- a/man/man8/zfs.8
++++ b/man/man8/zfs.8
+@@ -29,7 +29,7 @@
+ .\" Copyright 2016 Nexenta Systems, Inc.
+ .\" Copyright 2016 Richard Laager. All rights reserved.
+ .\"
+-.Dd June 28, 2017
++.Dd July 13, 2018
+ .Dt ZFS 8 SMM
+ .Os Linux
+ .Sh NAME
+@@ -1049,6 +1049,9 @@ The
+ and
+ .Sy edonr
+ checksum algorithms require enabling the appropriate features on the pool.
++These algorithms are not supported by GRUB and should not be set on the
++.Sy bootfs
++filesystem when using GRUB to boot the system.
+ Please see
+ .Xr zpool-features 5
+ for more information on these algorithms.
+diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
+index f4f509a7..6516f646 100644
+--- a/module/zfs/zfs_ioctl.c
++++ b/module/zfs/zfs_ioctl.c
+@@ -3985,16 +3985,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
+
+ if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+ return (err);
+- /*
+- * Salted checksums are not supported on root pools.
+- */
+- if (spa_bootfs(spa) != 0 &&
+- intval < ZIO_CHECKSUM_FUNCTIONS &&
+- (zio_checksum_table[intval].ci_flags &
+- ZCHECKSUM_FLAG_SALTED)) {
+- spa_close(spa, FTAG);
+- return (SET_ERROR(ERANGE));
+- }
++
+ if (!spa_feature_is_enabled(spa, feature)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ENOTSUP));
diff --git a/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch b/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch
new file mode 100644
index 0000000..17c7d43
--- /dev/null
+++ b/zfs-patches/0021-Fix-zfs-incremental-send-remove-o-properties.patch
@@ -0,0 +1,108 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: LOLi <loli10K at users.noreply.github.com>
+Date: Tue, 1 May 2018 05:58:29 +0200
+Subject: [PATCH] Fix zfs incremental send remove '-o' properties
+
+When receiving an incremental send stream with intermediary snapshots
+zfs_receive_one() does not correctly identify the top-level dataset:
+consequently we restore said snapshots as if they were children
+datasets in the hierarchy, forcing inheritance of any property received
+with 'zfs send -o' and effectively removing any locally set value.
+
+The test case did not correctly verify this situation because it uses
+adjacent snapshots, basically testing 'zfs send -i' instead of
+'zfs send -I': this commit adds an additional intermediary snapshot to
+the test script.
+
+Reviewed-by: Paul Dagnelie <pcd at delphix.com>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: loli10K <ezomori.nozomu at gmail.com>
+Closes #7478
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ lib/libzfs/libzfs_sendrecv.c | 2 +-
+ .../zfs_receive/receive-o-x_props_override.ksh | 22 +++++++++++++---------
+ 2 files changed, 14 insertions(+), 10 deletions(-)
+
+diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
+index 5490581a..c5acd21a 100644
+--- a/lib/libzfs/libzfs_sendrecv.c
++++ b/lib/libzfs/libzfs_sendrecv.c
+@@ -3592,7 +3592,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
+ goto out;
+ }
+
+- if (top_zfs && *top_zfs == NULL)
++ if (top_zfs && (*top_zfs == NULL || strcmp(*top_zfs, name) == 0))
+ toplevel = B_TRUE;
+ if (drrb->drr_type == DMU_OST_ZVOL) {
+ type = ZFS_TYPE_VOLUME;
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
+index e4e69851..4e3a5393 100755
+--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
+@@ -212,16 +212,17 @@ log_must eval "zfs send -R $orig at snap1 > $streamfile_repl"
+ log_must eval "zfs recv $dest < $streamfile_repl"
+ # Fill the datasets with properties and create an incremental replication stream
+ log_must zfs snapshot -r $orig at snap2
++log_must zfs snapshot -r $orig at snap3
+ log_must eval "zfs set copies=2 $orig"
+ log_must eval "zfs set '$userprop:orig'='$userval' $orig"
+ log_must eval "zfs set '$userprop:orig'='$userval' $origsub"
+ log_must eval "zfs set '$userprop:snap'='$userval' $orig at snap1"
+-log_must eval "zfs set '$userprop:snap'='$userval' $origsub at snap2"
+-log_must eval "zfs send -R -I $orig at snap1 $orig at snap2 > $streamfile_incr"
++log_must eval "zfs set '$userprop:snap'='$userval' $origsub at snap3"
++log_must eval "zfs send -R -I $orig at snap1 $orig at snap3 > $streamfile_incr"
+ # Sets various combination of override and exclude options
+ log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\
+ "-o quota=123456789 -x compression -x '$userprop:orig' " \
+- "-x '$userprop:snap2' $dest < $streamfile_incr"
++ "-x '$userprop:snap3' $dest < $streamfile_incr"
+ # Verify we can correctly override and exclude properties
+ log_must eval "check_prop_source $dest copies 2 received"
+ log_must eval "check_prop_source $dest atime off local"
+@@ -237,9 +238,9 @@ log_must eval "check_prop_missing $destsub '$userprop:orig'"
+ log_must eval "check_prop_source " \
+ "$dest at snap1 '$userprop:snap' '$userval' received"
+ log_must eval "check_prop_source " \
+- "$destsub at snap2 '$userprop:snap' '$userval' received"
+-log_must eval "check_prop_missing $dest at snap2 '$userprop:snap2'"
+-log_must eval "check_prop_missing $destsub at snap2 '$userprop:snap2'"
++ "$destsub at snap3 '$userprop:snap' '$userval' received"
++log_must eval "check_prop_missing $dest at snap3 '$userprop:snap3'"
++log_must eval "check_prop_missing $destsub at snap3 '$userprop:snap3'"
+ # Cleanup
+ log_must zfs destroy -r -f $orig
+ log_must zfs destroy -r -f $dest
+@@ -270,7 +271,8 @@ log_must eval "zfs set compression=gzip $dest"
+ log_must eval "zfs set '$userprop:dest'='localval' $dest"
+ # Receive the new stream, verify we preserve locally set properties
+ log_must zfs snapshot -r $orig at snap2
+-log_must eval "zfs send -R -I $orig at snap1 $orig at snap2 > $streamfile_incr"
++log_must zfs snapshot -r $orig at snap3
++log_must eval "zfs send -R -I $orig at snap1 $orig at snap3 > $streamfile_incr"
+ log_must eval "zfs recv -F -x copies -x compression -x '$userprop:orig' " \
+ "-x '$userprop:dest' $dest < $streamfile_incr"
+ log_must eval "check_prop_source $dest '$userprop:dest' 'localval' local"
+@@ -305,7 +307,8 @@ log_must eval "check_prop_source $destsub quota 0 default"
+ log_must eval "zfs set quota=123456789 $dest"
+ log_must eval "zfs set canmount=off $destsub"
+ log_must zfs snapshot -r $orig at snap2
+-log_must eval "zfs send -R -I $orig at snap1 $orig at snap2 > $streamfile_incr"
++log_must zfs snapshot -r $orig at snap3
++log_must eval "zfs send -R -I $orig at snap1 $orig at snap3 > $streamfile_incr"
+ log_must eval "zfs recv -F -x quota -x canmount $dest < $streamfile_incr"
+ log_must eval "check_prop_source $dest quota 123456789 local"
+ log_must eval "check_prop_source $destsub quota 0 default"
+@@ -332,7 +335,8 @@ log_must eval "zfs set '$userprop:origsub'='$userval' $destsub"
+ mntpnt=$(get_prop mountpoint $orig)
+ log_must eval "dd if=/dev/urandom of=$mntpnt/file bs=1024k count=10"
+ log_must zfs snapshot -r $orig at snap2
+-log_must eval "zfs send -R -I $orig at snap1 $orig at snap2 > $streamfile_incr"
++log_must zfs snapshot -r $orig at snap3
++log_must eval "zfs send -R -I $orig at snap1 $orig at snap3 > $streamfile_incr"
+ log_must eval "dd if=$streamfile_incr of=$streamfile_trun bs=1024k count=9"
+ # Receive the truncated stream, verify original properties are kept
+ log_mustnot eval "zfs recv -F -o copies=3 -o quota=987654321 "\
diff --git a/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch b/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch
new file mode 100644
index 0000000..7e70804
--- /dev/null
+++ b/zfs-patches/0022-Allow-inherited-properties-in-zfs_check_settable.patch
@@ -0,0 +1,95 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: LOLi <loli10K at users.noreply.github.com>
+Date: Fri, 3 Aug 2018 23:56:25 +0200
+Subject: [PATCH] Allow inherited properties in zfs_check_settable()
+
+This change modifies how 'checksum' and 'dedup' properties are verified
+in zfs_check_settable() handling the case where they are explicitly
+inherited in the dataset hierarchy when receiving a recursive send
+stream.
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Reviewed-by: Tom Caputi <tcaputi at datto.com>
+Signed-off-by: loli10K <ezomori.nozomu at gmail.com>
+Closes #7755
+Closes #7576
+Closes #7757
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/zfs_ioctl.c | 26 +++++++++++-----------
+ .../zfs_receive/receive-o-x_props_override.ksh | 6 +++--
+ 2 files changed, 17 insertions(+), 15 deletions(-)
+
+diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
+index 6516f646..b8783e54 100644
+--- a/module/zfs/zfs_ioctl.c
++++ b/module/zfs/zfs_ioctl.c
+@@ -3967,7 +3967,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
+ {
+ spa_feature_t feature;
+ spa_t *spa;
+- uint64_t intval;
+ int err;
+
+ /* dedup feature version checks */
+@@ -3975,22 +3974,23 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
+ zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
+ return (SET_ERROR(ENOTSUP));
+
+- if (nvpair_value_uint64(pair, &intval) != 0)
+- return (SET_ERROR(EINVAL));
+-
+- /* check prop value is enabled in features */
+- feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
+- if (feature == SPA_FEATURE_NONE)
+- break;
++ if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
++ nvpair_value_uint64(pair, &intval) == 0) {
++ /* check prop value is enabled in features */
++ feature = zio_checksum_to_feature(
++ intval & ZIO_CHECKSUM_MASK);
++ if (feature == SPA_FEATURE_NONE)
++ break;
+
+- if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+- return (err);
++ if ((err = spa_open(dsname, &spa, FTAG)) != 0)
++ return (err);
+
+- if (!spa_feature_is_enabled(spa, feature)) {
++ if (!spa_feature_is_enabled(spa, feature)) {
++ spa_close(spa, FTAG);
++ return (SET_ERROR(ENOTSUP));
++ }
+ spa_close(spa, FTAG);
+- return (SET_ERROR(ENOTSUP));
+ }
+- spa_close(spa, FTAG);
+ break;
+ }
+
+diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
+index 4e3a5393..583d8eb1 100755
+--- a/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
++++ b/tests/zfs-tests/tests/functional/cli_root/zfs_receive/receive-o-x_props_override.ksh
+@@ -221,15 +221,17 @@ log_must eval "zfs set '$userprop:snap'='$userval' $origsub at snap3"
+ log_must eval "zfs send -R -I $orig at snap1 $orig at snap3 > $streamfile_incr"
+ # Sets various combination of override and exclude options
+ log_must eval "zfs recv -F -o atime=off -o '$userprop:dest2'='$userval' "\
+- "-o quota=123456789 -x compression -x '$userprop:orig' " \
+- "-x '$userprop:snap3' $dest < $streamfile_incr"
++ "-o quota=123456789 -o checksum=sha512 -x compression "\
++ "-x '$userprop:orig' -x '$userprop:snap3' $dest < $streamfile_incr"
+ # Verify we can correctly override and exclude properties
+ log_must eval "check_prop_source $dest copies 2 received"
+ log_must eval "check_prop_source $dest atime off local"
+ log_must eval "check_prop_source $dest '$userprop:dest2' '$userval' local"
+ log_must eval "check_prop_source $dest quota 123456789 local"
++log_must eval "check_prop_source $dest checksum sha512 local"
+ log_must eval "check_prop_inherit $destsub copies $dest"
+ log_must eval "check_prop_inherit $destsub atime $dest"
++log_must eval "check_prop_inherit $destsub checksum $dest"
+ log_must eval "check_prop_inherit $destsub '$userprop:dest2' $dest"
+ log_must eval "check_prop_source $destsub quota 0 default"
+ log_must eval "check_prop_source $destsub compression off default"
diff --git a/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch b/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch
new file mode 100644
index 0000000..f5e0832
--- /dev/null
+++ b/zfs-patches/0023-Fix-arcstat.py-handling-of-unsupported-options.patch
@@ -0,0 +1,33 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: LOLi <loli10K at users.noreply.github.com>
+Date: Sat, 18 Aug 2018 22:10:36 +0200
+Subject: [PATCH] Fix arcstat.py handling of unsupported options
+
+This change allows the arcstat.py script to handle unsupported options
+gracefully and print both error and usage messages when one such option
+is provided.
+
+Reviewed-by: Giuseppe Di Natale <guss80 at gmail.com>
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: loli10K <ezomori.nozomu at gmail.com>
+Closes #7799
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ cmd/arcstat/arcstat.py | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/cmd/arcstat/arcstat.py b/cmd/arcstat/arcstat.py
+index 85c83ccc..b52a8c29 100755
+--- a/cmd/arcstat/arcstat.py
++++ b/cmd/arcstat/arcstat.py
+@@ -285,7 +285,7 @@ def init():
+ ]
+ )
+ except getopt.error as msg:
+- sys.stderr.write(msg)
++ sys.stderr.write("Error: %s\n" % str(msg))
+ usage()
+ opts = None
+
diff --git a/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch b/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch
new file mode 100644
index 0000000..2162a70
--- /dev/null
+++ b/zfs-patches/0024-Don-t-modify-argv-in-user-tools.patch
@@ -0,0 +1,123 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: DeHackEd <DeHackEd at users.noreply.github.com>
+Date: Mon, 20 Aug 2018 12:55:18 -0400
+Subject: [PATCH] Don't modify argv[] in user tools
+
+argv[] gets modified during string parsing for input arguments. This
+is reflected in the live process listing. Don't do that.
+
+Reviewed-by: Serapheim Dimitropoulos <serapheim at delphix.com>
+Reviewed-by: loli10K <ezomori.nozomu at gmail.com>
+Reviewed-by: Giuseppe Di Natale <guss80 at gmail.com>
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: DHE <git at dehacked.net>
+Closes #7760
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ cmd/zfs/zfs_main.c | 18 ++++++++++++++++--
+ cmd/zpool/zpool_main.c | 18 ++++++++++++++++--
+ 2 files changed, 32 insertions(+), 4 deletions(-)
+
+diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c
+index f57df858..275d9c89 100644
+--- a/cmd/zfs/zfs_main.c
++++ b/cmd/zfs/zfs_main.c
+@@ -7041,6 +7041,7 @@ main(int argc, char **argv)
+ int ret = 0;
+ int i = 0;
+ char *cmdname;
++ char **newargv;
+
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+@@ -7096,16 +7097,25 @@ main(int argc, char **argv)
+ libzfs_print_on_error(g_zfs, B_TRUE);
+
+ /*
++ * Many commands modify input strings for string parsing reasons.
++ * We create a copy to protect the original argv.
++ */
++ newargv = malloc((argc + 1) * sizeof (newargv[0]));
++ for (i = 0; i < argc; i++)
++ newargv[i] = strdup(argv[i]);
++ newargv[argc] = NULL;
++
++ /*
+ * Run the appropriate command.
+ */
+ libzfs_mnttab_cache(g_zfs, B_TRUE);
+ if (find_command_idx(cmdname, &i) == 0) {
+ current_command = &command_table[i];
+- ret = command_table[i].func(argc - 1, argv + 1);
++ ret = command_table[i].func(argc - 1, newargv + 1);
+ } else if (strchr(cmdname, '=') != NULL) {
+ verify(find_command_idx("set", &i) == 0);
+ current_command = &command_table[i];
+- ret = command_table[i].func(argc, argv);
++ ret = command_table[i].func(argc, newargv);
+ } else {
+ (void) fprintf(stderr, gettext("unrecognized "
+ "command '%s'\n"), cmdname);
+@@ -7113,6 +7123,10 @@ main(int argc, char **argv)
+ ret = 1;
+ }
+
++ for (i = 0; i < argc; i++)
++ free(newargv[i]);
++ free(newargv);
++
+ if (ret == 0 && log_history)
+ (void) zpool_log_history(g_zfs, history_str);
+
+diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
+index 97697011..a4fd0321 100644
+--- a/cmd/zpool/zpool_main.c
++++ b/cmd/zpool/zpool_main.c
+@@ -7971,6 +7971,7 @@ main(int argc, char **argv)
+ int ret = 0;
+ int i = 0;
+ char *cmdname;
++ char **newargv;
+
+ (void) setlocale(LC_ALL, "");
+ (void) textdomain(TEXT_DOMAIN);
+@@ -8006,15 +8007,24 @@ main(int argc, char **argv)
+ zfs_save_arguments(argc, argv, history_str, sizeof (history_str));
+
+ /*
++ * Many commands modify input strings for string parsing reasons.
++ * We create a copy to protect the original argv.
++ */
++ newargv = malloc((argc + 1) * sizeof (newargv[0]));
++ for (i = 0; i < argc; i++)
++ newargv[i] = strdup(argv[i]);
++ newargv[argc] = NULL;
++
++ /*
+ * Run the appropriate command.
+ */
+ if (find_command_idx(cmdname, &i) == 0) {
+ current_command = &command_table[i];
+- ret = command_table[i].func(argc - 1, argv + 1);
++ ret = command_table[i].func(argc - 1, newargv + 1);
+ } else if (strchr(cmdname, '=')) {
+ verify(find_command_idx("set", &i) == 0);
+ current_command = &command_table[i];
+- ret = command_table[i].func(argc, argv);
++ ret = command_table[i].func(argc, newargv);
+ } else if (strcmp(cmdname, "freeze") == 0 && argc == 3) {
+ /*
+ * 'freeze' is a vile debugging abomination, so we treat
+@@ -8031,6 +8041,10 @@ main(int argc, char **argv)
+ ret = 1;
+ }
+
++ for (i = 0; i < argc; i++)
++ free(newargv[i]);
++ free(newargv);
++
+ if (ret == 0 && log_history)
+ (void) zpool_log_history(g_zfs, history_str);
+
diff --git a/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch b/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch
new file mode 100644
index 0000000..fbcc19a
--- /dev/null
+++ b/zfs-patches/0025-Add-missing-zfs-dracut-RPM-dependencies.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Tue, 31 Jul 2018 10:17:44 -0700
+Subject: [PATCH] Add missing zfs-dracut RPM dependencies
+
+The zfs-dracut package requires the hostid, basename, head, awk,
+and grep utilities be installed. The first three are provided by
+coreutils but additional dependencies are required for awk and grep.
+
+Reviewed-by: Manuel Amador (Rudd-O) <rudd-o at rudd-o.com>
+Reviewed-by: Tony Hutter <hutter2 at llnl.gov>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7729
+Closes #7747
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ rpm/generic/zfs.spec.in | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
+index 5b89db02..398221c6 100644
+--- a/rpm/generic/zfs.spec.in
++++ b/rpm/generic/zfs.spec.in
+@@ -196,7 +196,7 @@ Requires: acl
+ Requires: sudo
+ Requires: sysstat
+ Requires: rng-tools
+-Requires: libaio
++Requires: libaio
+ AutoReqProv: no
+
+ %description test
+@@ -208,6 +208,8 @@ Summary: Dracut module
+ Group: System Environment/Kernel
+ Requires: %{name}%{?_isa} = %{version}-%{release}
+ Requires: dracut
++Requires: /usr/bin/awk
++Requires: grep
+
+ %description dracut
+ This package contains a dracut module used to construct an initramfs
diff --git a/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch b/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch
new file mode 100644
index 0000000..a68148a
--- /dev/null
+++ b/zfs-patches/0026-Add-libaio-devel-BuildRequires.patch
@@ -0,0 +1,31 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Brian Behlendorf <behlendorf1 at llnl.gov>
+Date: Thu, 23 Aug 2018 09:34:34 -0700
+Subject: [PATCH] Add libaio-devel BuildRequires
+
+The zfs-test package needs a build requirement on the libaio-devel
+package. Without it ./configure will correctly determine that
+mmap_libaio cannot be built and it will be skipped.
+
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Closes #7821
+Closes #7824
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ rpm/generic/zfs.spec.in | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
+index 398221c6..16c5780b 100644
+--- a/rpm/generic/zfs.spec.in
++++ b/rpm/generic/zfs.spec.in
+@@ -197,6 +197,7 @@ Requires: sudo
+ Requires: sysstat
+ Requires: rng-tools
+ Requires: libaio
++BuildRequires: libaio-devel
+ AutoReqProv: no
+
+ %description test
diff --git a/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch b/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch
new file mode 100644
index 0000000..88e0c37
--- /dev/null
+++ b/zfs-patches/0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch
@@ -0,0 +1,36 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: LOLi <loli10K at users.noreply.github.com>
+Date: Sun, 26 Aug 2018 21:43:27 +0200
+Subject: [PATCH] Fix libaio-devel requirement for Debian-based distributions
+
+BuildRequires tags for "-devel" packages in the RPM spec file do not
+work when building on Debian-based distributions.
+
+Fix this issue by making this requirement conditional to RPM-based
+distributions.
+
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: loli10K <ezomori.nozomu at gmail.com>
+Closes #7829
+Closes #7831
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ rpm/generic/zfs.spec.in | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
+index 16c5780b..22565725 100644
+--- a/rpm/generic/zfs.spec.in
++++ b/rpm/generic/zfs.spec.in
+@@ -197,7 +197,9 @@ Requires: sudo
+ Requires: sysstat
+ Requires: rng-tools
+ Requires: libaio
++%if 0%{?rhel}%{?fedora}%{?suse_version}
+ BuildRequires: libaio-devel
++%endif
+ AutoReqProv: no
+
+ %description test
diff --git a/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch b/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch
new file mode 100644
index 0000000..e49a99b
--- /dev/null
+++ b/zfs-patches/0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch
@@ -0,0 +1,61 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Joao Carlos Mendes Luis <dioni21 at users.noreply.github.com>
+Date: Sun, 26 Aug 2018 16:55:44 -0300
+Subject: [PATCH] Fedora 28: Fix misc bounds check compiler warnings
+
+Fix a bunch of truncation compiler warnings that show up
+on Fedora 28 (GCC 8.0.1).
+
+Reviewed-by: Giuseppe Di Natale <guss80 at gmail.com>
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Issue #7368
+Closes #7826
+Closes #7830
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ lib/libshare/smb.c | 2 +-
+ module/icp/core/kcf_mech_tabs.c | 2 +-
+ tests/zfs-tests/tests/functional/ctime/ctime.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/lib/libshare/smb.c b/lib/libshare/smb.c
+index 76145bd9..91d4decb 100644
+--- a/lib/libshare/smb.c
++++ b/lib/libshare/smb.c
+@@ -218,7 +218,7 @@ smb_enable_share_one(const char *sharename, const char *sharepath)
+ int rc;
+
+ /* Support ZFS share name regexp '[[:alnum:]_-.: ]' */
+- strncpy(name, sharename, sizeof (name));
++ strlcpy(name, sharename, sizeof (name));
+ name [sizeof (name)-1] = '\0';
+
+ pos = name;
+diff --git a/module/icp/core/kcf_mech_tabs.c b/module/icp/core/kcf_mech_tabs.c
+index 723bfdb6..741dae7a 100644
+--- a/module/icp/core/kcf_mech_tabs.c
++++ b/module/icp/core/kcf_mech_tabs.c
+@@ -321,7 +321,7 @@ kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
+ mutex_enter(&(me_tab[i].me_mutex));
+ if (me_tab[i].me_name[0] == 0) {
+ /* Found an empty spot */
+- (void) strncpy(me_tab[i].me_name, mechname,
++ (void) strlcpy(me_tab[i].me_name, mechname,
+ CRYPTO_MAX_MECH_NAME);
+ me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
+ me_tab[i].me_mechid = KCF_MECHID(class, i);
+diff --git a/tests/zfs-tests/tests/functional/ctime/ctime.c b/tests/zfs-tests/tests/functional/ctime/ctime.c
+index ba8af15f..1cd18323 100644
+--- a/tests/zfs-tests/tests/functional/ctime/ctime.c
++++ b/tests/zfs-tests/tests/functional/ctime/ctime.c
+@@ -155,7 +155,7 @@ do_link(const char *pfile)
+ return (-1);
+ }
+
+- strncpy(pfile_copy, pfile, sizeof (pfile_copy));
++ strncpy(pfile_copy, pfile, sizeof (pfile_copy)-1);
+ pfile_copy[sizeof (pfile_copy) - 1] = '\0';
+ /*
+ * Figure out source file directory name, and create
diff --git a/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch b/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch
new file mode 100644
index 0000000..a504099
--- /dev/null
+++ b/zfs-patches/0029-Fix-problems-receiving-reallocated-dnodes.patch
@@ -0,0 +1,556 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tim Chase <tim at chase2k.com>
+Date: Mon, 27 Aug 2018 10:28:32 -0400
+Subject: [PATCH] Fix problems receiving reallocated dnodes
+
+This is a port of 047116ac - Raw sends must be able to decrease nlevels,
+to the zfs-0.7-stable branch. It includes the various fixes to the
+problem of receiving incremental streams which include reallocated dnodes
+in which the number of dnode slots has changed but excludes the parts
+which are related to raw streams.
+
+From 047116ac:
+
+ Currently, when a raw zfs send file includes a
+ DRR_OBJECT record that would decrease the number of
+ levels of an existing object, the object is reallocated
+ with dmu_object_reclaim() which creates the new dnode
+ using the old object's nlevels. For non-raw sends this
+ doesn't really matter, but raw sends require that
+ nlevels on the receive side match that of the send
+ side so that the checksum-of-MAC tree can be properly
+ maintained. This patch corrects the issue by freeing
+ the object completely before allocating it again in
+ this case.
+
+ This patch also corrects several issues with
+ dnode_hold_impl() and related functions that prevented
+ dnodes (particularly multi-slot dnodes) from being
+ reallocated properly due to the fact that existing
+ dnodes were not being fully cleaned up when they
+ were freed.
+
+ This patch adds a test to make sure that zfs recv
+ functions properly with incremental streams containing
+ dnodes of different sizes.
+
+This also includes a one-liner fix from loli10K to fix a test failure:
+https://github.com/zfsonlinux/zfs/pull/7792#discussion_r212769264
+
+Authored-by: Tom Caputi <tcaputi at datto.com>
+Reviewed by: Matthew Ahrens <mahrens at delphix.com>
+Reviewed-by: Jorgen Lundman <lundman at lundman.net>
+Signed-off-by: Tom Caputi <tcaputi at datto.com>
+Signed-off-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Tim Chase <tim at chase2k.com>
+Ported-by: Tim Chase <tim at chase2k.com>
+
+Closes #6821
+Closes #6864
+
+NOTE: This is the first of the port of 3 related patches patches to the
+zfs-0.7-release branch of ZoL. The other two patches should immediately
+follow this one.
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ cmd/ztest/ztest.c | 25 +++++-
+ include/sys/dnode.h | 6 ++
+ lib/libzfs/libzfs_sendrecv.c | 1 +
+ module/zfs/dmu_object.c | 1 -
+ module/zfs/dmu_send.c | 51 +++++++++--
+ module/zfs/dnode.c | 84 +++++++++++++++++--
+ module/zfs/dnode_sync.c | 2 +
+ tests/runfiles/linux.run | 2 +-
+ tests/zfs-tests/tests/functional/rsend/Makefile.am | 3 +-
+ .../functional/rsend/send_realloc_dnode_size.ksh | 98 ++++++++++++++++++++++
+ 10 files changed, 258 insertions(+), 15 deletions(-)
+ create mode 100644 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+
+diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c
+index 1a320b03..a410eeef 100644
+--- a/cmd/ztest/ztest.c
++++ b/cmd/ztest/ztest.c
+@@ -197,7 +197,8 @@ extern uint64_t metaslab_gang_bang;
+ extern uint64_t metaslab_df_alloc_threshold;
+ extern int metaslab_preload_limit;
+ extern boolean_t zfs_compressed_arc_enabled;
+-extern int zfs_abd_scatter_enabled;
++extern int zfs_abd_scatter_enabled;
++extern int dmu_object_alloc_chunk_shift;
+
+ static ztest_shared_opts_t *ztest_shared_opts;
+ static ztest_shared_opts_t ztest_opts;
+@@ -310,6 +311,7 @@ static ztest_shared_callstate_t *ztest_shared_callstate;
+ ztest_func_t ztest_dmu_read_write;
+ ztest_func_t ztest_dmu_write_parallel;
+ ztest_func_t ztest_dmu_object_alloc_free;
++ztest_func_t ztest_dmu_object_next_chunk;
+ ztest_func_t ztest_dmu_commit_callbacks;
+ ztest_func_t ztest_zap;
+ ztest_func_t ztest_zap_parallel;
+@@ -357,6 +359,7 @@ ztest_info_t ztest_info[] = {
+ ZTI_INIT(ztest_dmu_read_write, 1, &zopt_always),
+ ZTI_INIT(ztest_dmu_write_parallel, 10, &zopt_always),
+ ZTI_INIT(ztest_dmu_object_alloc_free, 1, &zopt_always),
++ ZTI_INIT(ztest_dmu_object_next_chunk, 1, &zopt_sometimes),
+ ZTI_INIT(ztest_dmu_commit_callbacks, 1, &zopt_always),
+ ZTI_INIT(ztest_zap, 30, &zopt_always),
+ ZTI_INIT(ztest_zap_parallel, 100, &zopt_always),
+@@ -3927,6 +3930,26 @@ ztest_dmu_object_alloc_free(ztest_ds_t *zd, uint64_t id)
+ umem_free(od, size);
+ }
+
++/*
++ * Rewind the global allocator to verify object allocation backfilling.
++ */
++void
++ztest_dmu_object_next_chunk(ztest_ds_t *zd, uint64_t id)
++{
++ objset_t *os = zd->zd_os;
++ int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
++ uint64_t object;
++
++ /*
++ * Rewind the global allocator randomly back to a lower object number
++ * to force backfilling and reclamation of recently freed dnodes.
++ */
++ mutex_enter(&os->os_obj_lock);
++ object = ztest_random(os->os_obj_next_chunk);
++ os->os_obj_next_chunk = P2ALIGN(object, dnodes_per_chunk);
++ mutex_exit(&os->os_obj_lock);
++}
++
+ #undef OD_ARRAY_SIZE
+ #define OD_ARRAY_SIZE 2
+
+diff --git a/include/sys/dnode.h b/include/sys/dnode.h
+index c7efe559..ea7defe1 100644
+--- a/include/sys/dnode.h
++++ b/include/sys/dnode.h
+@@ -360,6 +360,7 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
+ int minlvl, uint64_t blkfill, uint64_t txg);
+ void dnode_evict_dbufs(dnode_t *dn);
+ void dnode_evict_bonus(dnode_t *dn);
++void dnode_free_interior_slots(dnode_t *dn);
+
+ #define DNODE_IS_CACHEABLE(_dn) \
+ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
+@@ -454,6 +455,11 @@ typedef struct dnode_stats {
+ */
+ kstat_named_t dnode_hold_free_txg;
+ /*
++ * Number of times dnode_free_interior_slots() needed to retry
++ * acquiring a slot zrl lock due to contention.
++ */
++ kstat_named_t dnode_free_interior_lock_retry;
++ /*
+ * Number of new dnodes allocated by dnode_allocate().
+ */
+ kstat_named_t dnode_allocate;
+diff --git a/lib/libzfs/libzfs_sendrecv.c b/lib/libzfs/libzfs_sendrecv.c
+index c5acd21a..cadf16cc 100644
+--- a/lib/libzfs/libzfs_sendrecv.c
++++ b/lib/libzfs/libzfs_sendrecv.c
+@@ -3577,6 +3577,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
+ }
+
+ newfs = B_TRUE;
++ *cp = '/';
+ }
+
+ if (flags->verbose) {
+diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
+index e7412b75..f53da407 100644
+--- a/module/zfs/dmu_object.c
++++ b/module/zfs/dmu_object.c
+@@ -275,7 +275,6 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ return (err);
+ }
+
+-
+ int
+ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
+ {
+diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
+index cdbc1cd1..148b5ff8 100644
+--- a/module/zfs/dmu_send.c
++++ b/module/zfs/dmu_send.c
+@@ -2156,10 +2156,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ }
+
+ err = dmu_object_info(rwa->os, drro->drr_object, &doi);
+-
+- if (err != 0 && err != ENOENT)
++ if (err != 0 && err != ENOENT && err != EEXIST)
+ return (SET_ERROR(EINVAL));
+- object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
+
+ if (drro->drr_object > rwa->max_object)
+ rwa->max_object = drro->drr_object;
+@@ -2175,13 +2173,56 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ nblkptr = deduce_nblkptr(drro->drr_bonustype,
+ drro->drr_bonuslen);
+
++ object = drro->drr_object;
++
+ if (drro->drr_blksz != doi.doi_data_block_size ||
+- nblkptr < doi.doi_nblkptr) {
++ nblkptr < doi.doi_nblkptr ||
++ drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ 0, DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
++ } else if (err == EEXIST) {
++ /*
++ * The object requested is currently an interior slot of a
++ * multi-slot dnode. This will be resolved when the next txg
++ * is synced out, since the send stream will have told us
++ * to free this slot when we freed the associated dnode
++ * earlier in the stream.
++ */
++ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
++ object = drro->drr_object;
++ } else {
++ /* object is free and we are about to allocate a new one */
++ object = DMU_NEW_OBJECT;
++ }
++
++ /*
++ * If this is a multi-slot dnode there is a chance that this
++ * object will expand into a slot that is already used by
++ * another object from the previous snapshot. We must free
++ * these objects before we attempt to allocate the new dnode.
++ */
++ if (drro->drr_dn_slots > 1) {
++ for (uint64_t slot = drro->drr_object + 1;
++ slot < drro->drr_object + drro->drr_dn_slots;
++ slot++) {
++ dmu_object_info_t slot_doi;
++
++ err = dmu_object_info(rwa->os, slot, &slot_doi);
++ if (err == ENOENT || err == EEXIST)
++ continue;
++ else if (err != 0)
++ return (err);
++
++ err = dmu_free_long_object(rwa->os, slot);
++
++ if (err != 0)
++ return (err);
++ }
++
++ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
+ }
+
+ tx = dmu_tx_create(rwa->os);
+@@ -2732,7 +2773,7 @@ receive_read_record(struct receive_arg *ra)
+ * See receive_read_prefetch for an explanation why we're
+ * storing this object in the ignore_obj_list.
+ */
+- if (err == ENOENT ||
++ if (err == ENOENT || err == EEXIST ||
+ (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
+ objlist_insert(&ra->ignore_objlist, drro->drr_object);
+ err = 0;
+diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
+index e05a4d0a..df6a4872 100644
+--- a/module/zfs/dnode.c
++++ b/module/zfs/dnode.c
+@@ -55,6 +55,7 @@ dnode_stats_t dnode_stats = {
+ { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
++ { "dnode_free_interior_lock_retry", KSTAT_DATA_UINT64 },
+ { "dnode_allocate", KSTAT_DATA_UINT64 },
+ { "dnode_reallocate", KSTAT_DATA_UINT64 },
+ { "dnode_buf_evict", KSTAT_DATA_UINT64 },
+@@ -516,7 +517,8 @@ dnode_destroy(dnode_t *dn)
+ mutex_exit(&os->os_lock);
+
+ /* the dnode can no longer move, so we can release the handle */
+- zrl_remove(&dn->dn_handle->dnh_zrlock);
++ if (!zrl_is_locked(&dn->dn_handle->dnh_zrlock))
++ zrl_remove(&dn->dn_handle->dnh_zrlock);
+
+ dn->dn_allocated_txg = 0;
+ dn->dn_free_txg = 0;
+@@ -662,6 +664,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
+ DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
+
+ dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
++
++ dnode_free_interior_slots(dn);
+ DNODE_STAT_BUMP(dnode_reallocate);
+
+ /* clean up any unreferenced dbufs */
+@@ -1062,19 +1066,73 @@ dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr)
+ }
+
+ static boolean_t
+-dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr)
++dnode_check_slots_free(dnode_children_t *children, int idx, int slots)
+ {
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+- if (dnh->dnh_dnode != ptr)
++ dnode_t *dn = dnh->dnh_dnode;
++
++ if (dn == DN_SLOT_FREE) {
++ continue;
++ } else if (DN_SLOT_IS_PTR(dn)) {
++ mutex_enter(&dn->dn_mtx);
++ dmu_object_type_t type = dn->dn_type;
++ mutex_exit(&dn->dn_mtx);
++
++ if (type != DMU_OT_NONE)
++ return (B_FALSE);
++
++ continue;
++ } else {
+ return (B_FALSE);
++ }
++
++ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+ }
+
++static void
++dnode_reclaim_slots(dnode_children_t *children, int idx, int slots)
++{
++ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
++
++ for (int i = idx; i < idx + slots; i++) {
++ dnode_handle_t *dnh = &children->dnc_children[i];
++
++ ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
++
++ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
++ ASSERT3S(dnh->dnh_dnode->dn_type, ==, DMU_OT_NONE);
++ dnode_destroy(dnh->dnh_dnode);
++ dnh->dnh_dnode = DN_SLOT_FREE;
++ }
++ }
++}
++
++void
++dnode_free_interior_slots(dnode_t *dn)
++{
++ dnode_children_t *children = dmu_buf_get_user(&dn->dn_dbuf->db);
++ int epb = dn->dn_dbuf->db.db_size >> DNODE_SHIFT;
++ int idx = (dn->dn_object & (epb - 1)) + 1;
++ int slots = dn->dn_num_slots - 1;
++
++ if (slots == 0)
++ return;
++
++ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
++
++ while (!dnode_slots_tryenter(children, idx, slots))
++ DNODE_STAT_BUMP(dnode_free_interior_lock_retry);
++
++ dnode_set_slots(children, idx, slots, DN_SLOT_FREE);
++ dnode_slots_rele(children, idx, slots);
++}
++
+ void
+ dnode_special_close(dnode_handle_t *dnh)
+ {
+@@ -1355,7 +1413,7 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
+ while (dn == DN_SLOT_UNINIT) {
+ dnode_slots_hold(dnc, idx, slots);
+
+- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
++ if (!dnode_check_slots_free(dnc, idx, slots)) {
+ DNODE_STAT_BUMP(dnode_hold_free_misses);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+@@ -1368,15 +1426,29 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
+ continue;
+ }
+
+- if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
++ if (!dnode_check_slots_free(dnc, idx, slots)) {
+ DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOSPC));
+ }
+
++ /*
++ * Allocated but otherwise free dnodes which would
++ * be in the interior of a multi-slot dnodes need
++ * to be freed. Single slot dnodes can be safely
++ * re-purposed as a performance optimization.
++ */
++ if (slots > 1)
++ dnode_reclaim_slots(dnc, idx + 1, slots - 1);
++
+ dnh = &dnc->dnc_children[idx];
+- dn = dnode_create(os, dn_block + idx, db, object, dnh);
++ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
++ dn = dnh->dnh_dnode;
++ } else {
++ dn = dnode_create(os, dn_block + idx, db,
++ object, dnh);
++ }
+ }
+
+ mutex_enter(&dn->dn_mtx);
+diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c
+index 742d962b..8d65e385 100644
+--- a/module/zfs/dnode_sync.c
++++ b/module/zfs/dnode_sync.c
+@@ -533,6 +533,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
+ if (dn->dn_allocated_txg != dn->dn_free_txg)
+ dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
+ bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
++ dnode_free_interior_slots(dn);
+
+ mutex_enter(&dn->dn_mtx);
+ dn->dn_type = DMU_OT_NONE;
+@@ -540,6 +541,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
+ dn->dn_allocated_txg = 0;
+ dn->dn_free_txg = 0;
+ dn->dn_have_spill = B_FALSE;
++ dn->dn_num_slots = 1;
+ mutex_exit(&dn->dn_mtx);
+
+ ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
+diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run
+index 69e9eb26..d8fe6f3a 100644
+--- a/tests/runfiles/linux.run
++++ b/tests/runfiles/linux.run
+@@ -605,7 +605,7 @@ tests = ['rsend_001_pos', 'rsend_002_pos', 'rsend_003_pos', 'rsend_004_pos',
+ 'send-c_lz4_disabled', 'send-c_recv_lz4_disabled',
+ 'send-c_mixed_compression', 'send-c_stream_size_estimate', 'send-cD',
+ 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
+- 'send-c_recv_dedup', 'send_freeobjects']
++ 'send-c_recv_dedup', 'send_freeobjects', 'send_realloc_dnode_size']
+ tags = ['functional', 'rsend']
+
+ [tests/functional/scrub_mirror]
+diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
+index 6b1aa8b3..a2837d1a 100644
+--- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
++++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
+@@ -36,7 +36,8 @@ dist_pkgdata_SCRIPTS = \
+ send-c_volume.ksh \
+ send-c_zstreamdump.ksh \
+ send-cpL_varied_recsize.ksh \
+- send_freeobjects.ksh
++ send_freeobjects.ksh \
++ send_realloc_dnode_size.ksh
+
+ dist_pkgdata_DATA = \
+ rsend.cfg \
+diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+new file mode 100644
+index 00000000..20676394
+--- /dev/null
++++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+@@ -0,0 +1,98 @@
++#!/bin/ksh
++
++#
++# This file and its contents are supplied under the terms of the
++# Common Development and Distribution License ("CDDL"), version 1.0.
++# You may only use this file in accordance with the terms of version
++# 1.0 of the CDDL.
++#
++# A full copy of the text of the CDDL should have accompanied this
++# source. A copy of the CDDL is also available via the Internet at
++# http://www.illumos.org/license/CDDL.
++#
++
++#
++# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
++#
++
++. $STF_SUITE/include/libtest.shlib
++. $STF_SUITE/tests/functional/rsend/rsend.kshlib
++
++#
++# Description:
++# Verify incremental receive properly handles objects with changed
++# dnode slot count.
++#
++# Strategy:
++# 1. Populate a dataset with 1k byte dnodes and snapshot
++# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects
++# get recycled numbers and formerly "interior" dnode slots get assigned
++# to new objects
++# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
++# overlap with recently recycled and formerly "normal" dnode slots get
++# assigned to new objects
++# 4. Generate initial and incremental streams
++# 5. Verify initial and incremental streams can be received
++#
++
++verify_runnable "both"
++
++log_assert "Verify incremental receive handles objects with changed dnode size"
++
++function cleanup
++{
++ rm -f $BACKDIR/fs-dn-legacy
++ rm -f $BACKDIR/fs-dn-1k
++ rm -f $BACKDIR/fs-dn-2k
++
++ if datasetexists $POOL/fs ; then
++ log_must zfs destroy -rR $POOL/fs
++ fi
++
++ if datasetexists $POOL/newfs ; then
++ log_must zfs destroy -rR $POOL/newfs
++ fi
++}
++
++log_onexit cleanup
++
++# 1. Populate a dataset with 1k byte dnodes and snapshot
++log_must zfs create -o dnodesize=1k $POOL/fs
++log_must mk_files 200 262144 0 $POOL/fs
++log_must zfs snapshot $POOL/fs at a
++
++# 2. Remove objects, set dnodesize=legacy, and remount dataset so new objects
++# get recycled numbers and formerly "interior" dnode slots get assigned
++# to new objects
++rm /$POOL/fs/*
++
++log_must zfs unmount $POOL/fs
++log_must zfs set dnodesize=legacy $POOL/fs
++log_must zfs mount $POOL/fs
++
++log_must mk_files 200 262144 0 $POOL/fs
++log_must zfs snapshot $POOL/fs at b
++
++# 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
++# overlap with recently recycled and formerly "normal" dnode slots get
++# assigned to new objects
++rm /$POOL/fs/*
++
++log_must zfs unmount $POOL/fs
++log_must zfs set dnodesize=2k $POOL/fs
++log_must zfs mount $POOL/fs
++
++mk_files 200 262144 0 $POOL/fs
++log_must zfs snapshot $POOL/fs at c
++
++# 4. Generate initial and incremental streams
++log_must eval "zfs send $POOL/fs at a > $BACKDIR/fs-dn-1k"
++log_must eval "zfs send -i $POOL/fs at a $POOL/fs at b > $BACKDIR/fs-dn-legacy"
++log_must eval "zfs send -i $POOL/fs at b $POOL/fs at c > $BACKDIR/fs-dn-2k"
++
++# 5. Verify initial and incremental streams can be received
++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k"
++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy"
++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k"
++
++log_pass "Verify incremental receive handles objects with changed dnode size"
diff --git a/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch b/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch
new file mode 100644
index 0000000..fd5abd1
--- /dev/null
+++ b/zfs-patches/0030-Fix-object-reclaim-when-using-large-dnodes.patch
@@ -0,0 +1,134 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Caputi <tcaputi at datto.com>
+Date: Tue, 17 Apr 2018 14:13:57 -0400
+Subject: [PATCH] Fix object reclaim when using large dnodes
+
+Currently, when the receive_object() code wants to reclaim an
+object, it always assumes that the dnode is the legacy 512 bytes,
+even when the incoming bonus buffer exceeds this length. This
+causes a buffer overflow if --enable-debug is not provided and
+triggers an ASSERT if it is. This patch resolves this issue and
+adds an ASSERT to ensure this can't happen again.
+
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Tom Caputi <tcaputi at datto.com>
+Closes #7097
+Closes #7433
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/dmu_object.c | 2 +-
+ module/zfs/dmu_send.c | 5 +++--
+ module/zfs/dnode.c | 3 +--
+ .../functional/rsend/send_realloc_dnode_size.ksh | 21 +++++++++++++++++----
+ 4 files changed, 22 insertions(+), 9 deletions(-)
+ mode change 100644 => 100755 tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+
+diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
+index f53da407..1fc71d10 100644
+--- a/module/zfs/dmu_object.c
++++ b/module/zfs/dmu_object.c
+@@ -249,7 +249,7 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+ {
+ return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
+- bonuslen, 0, tx));
++ bonuslen, DNODE_MIN_SIZE, tx));
+ }
+
+ int
+diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
+index 148b5ff8..1de0f316 100644
+--- a/module/zfs/dmu_send.c
++++ b/module/zfs/dmu_send.c
+@@ -2244,9 +2244,10 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ drro->drr_bonustype != doi.doi_bonus_type ||
+ drro->drr_bonuslen != doi.doi_bonus_size) {
+ /* currently allocated, but with different properties */
+- err = dmu_object_reclaim(rwa->os, drro->drr_object,
++ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
+ drro->drr_type, drro->drr_blksz,
+- drro->drr_bonustype, drro->drr_bonuslen, tx);
++ drro->drr_bonustype, drro->drr_bonuslen,
++ drro->drr_dn_slots << DNODE_SHIFT, tx);
+ }
+ if (err != 0) {
+ dmu_tx_commit(tx);
+diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c
+index df6a4872..d465b545 100644
+--- a/module/zfs/dnode.c
++++ b/module/zfs/dnode.c
+@@ -662,8 +662,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
+ ASSERT(DMU_OT_IS_VALID(bonustype));
+ ASSERT3U(bonuslen, <=,
+ DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
+-
+- dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
++ ASSERT3U(bonuslen, <=, DN_BONUS_SIZE(dn_slots << DNODE_SHIFT));
+
+ dnode_free_interior_slots(dn);
+ DNODE_STAT_BUMP(dnode_reallocate);
+diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+old mode 100644
+new mode 100755
+index 20676394..12a72fa0
+--- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
++++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh
+@@ -13,6 +13,7 @@
+
+ #
+ # Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
++# Copyright (c) 2018 Datto Inc.
+ #
+
+ . $STF_SUITE/include/libtest.shlib
+@@ -31,8 +32,10 @@
+ # 3. Remove objects, set dnodesize=2k, and remount dataset so new objects
+ # overlap with recently recycled and formerly "normal" dnode slots get
+ # assigned to new objects
+-# 4. Generate initial and incremental streams
+-# 5. Verify initial and incremental streams can be received
++# 4. Create an empty file and add xattrs to it to exercise reclaiming a
++# dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
++# 5. Generate initial and incremental streams
++# 6. Verify initial and incremental streams can be received
+ #
+
+ verify_runnable "both"
+@@ -44,6 +47,7 @@ function cleanup
+ rm -f $BACKDIR/fs-dn-legacy
+ rm -f $BACKDIR/fs-dn-1k
+ rm -f $BACKDIR/fs-dn-2k
++ rm -f $BACKDIR/fs-attr
+
+ if datasetexists $POOL/fs ; then
+ log_must zfs destroy -rR $POOL/fs
+@@ -82,17 +86,26 @@ log_must zfs unmount $POOL/fs
+ log_must zfs set dnodesize=2k $POOL/fs
+ log_must zfs mount $POOL/fs
+
++log_must touch /$POOL/fs/attrs
+ mk_files 200 262144 0 $POOL/fs
+ log_must zfs snapshot $POOL/fs at c
+
+-# 4. Generate initial and incremental streams
++# 4. Create an empty file and add xattrs to it to exercise reclaiming a
++# dnode that requires more than 1 slot for its bonus buffer (Zol #7433)
++log_must zfs set compression=on xattr=sa $POOL/fs
++log_must eval "python -c 'print \"a\" * 512' | attr -s bigval /$POOL/fs/attrs"
++log_must zfs snapshot $POOL/fs at d
++
++# 5. Generate initial and incremental streams
+ log_must eval "zfs send $POOL/fs at a > $BACKDIR/fs-dn-1k"
+ log_must eval "zfs send -i $POOL/fs at a $POOL/fs at b > $BACKDIR/fs-dn-legacy"
+ log_must eval "zfs send -i $POOL/fs at b $POOL/fs at c > $BACKDIR/fs-dn-2k"
++log_must eval "zfs send -i $POOL/fs at c $POOL/fs at d > $BACKDIR/fs-attr"
+
+-# 5. Verify initial and incremental streams can be received
++# 6. Verify initial and incremental streams can be received
+ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-1k"
+ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-legacy"
+ log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-dn-2k"
++log_must eval "zfs recv $POOL/newfs < $BACKDIR/fs-attr"
+
+ log_pass "Verify incremental receive handles objects with changed dnode size"
diff --git a/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch b/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch
new file mode 100644
index 0000000..c5a749e
--- /dev/null
+++ b/zfs-patches/0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch
@@ -0,0 +1,124 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tom Caputi <tcaputi at datto.com>
+Date: Thu, 28 Jun 2018 17:55:11 -0400
+Subject: [PATCH] Fix 'zfs recv' of non large_dnode send streams
+
+Currently, there is a bug where older send streams without the
+DMU_BACKUP_FEATURE_LARGE_DNODE flag are not handled correctly.
+The code in receive_object() fails to handle cases where
+drro->drr_dn_slots is set to 0, which is always the case when the
+sending code does not support this feature flag. This patch fixes
+the issue by ensuring that that a value of 0 is treated as
+DNODE_MIN_SLOTS.
+
+Tested-by: DHE <git at dehacked.net>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Tom Caputi <tcaputi at datto.com>
+Closes #7617
+Closes #7662
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/dmu_object.c | 3 +++
+ module/zfs/dmu_send.c | 33 +++++++++++++++++++++++++++------
+ 2 files changed, 30 insertions(+), 6 deletions(-)
+
+diff --git a/module/zfs/dmu_object.c b/module/zfs/dmu_object.c
+index 1fc71d10..40c25362 100644
+--- a/module/zfs/dmu_object.c
++++ b/module/zfs/dmu_object.c
+@@ -261,6 +261,9 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ int dn_slots = dnodesize >> DNODE_SHIFT;
+ int err;
+
++ if (dn_slots == 0)
++ dn_slots = DNODE_MIN_SLOTS;
++
+ if (object == DMU_META_DNODE_OBJECT)
+ return (SET_ERROR(EBADF));
+
+diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
+index 1de0f316..13aae960 100644
+--- a/module/zfs/dmu_send.c
++++ b/module/zfs/dmu_send.c
+@@ -2139,6 +2139,8 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ dmu_tx_t *tx;
+ uint64_t object;
+ int err;
++ uint8_t dn_slots = drro->drr_dn_slots != 0 ?
++ drro->drr_dn_slots : DNODE_MIN_SLOTS;
+
+ if (drro->drr_type == DMU_OT_NONE ||
+ !DMU_OT_IS_VALID(drro->drr_type) ||
+@@ -2150,7 +2152,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
+ drro->drr_bonuslen >
+ DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) ||
+- drro->drr_dn_slots >
++ dn_slots >
+ (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT)) {
+ return (SET_ERROR(EINVAL));
+ }
+@@ -2177,12 +2179,31 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+
+ if (drro->drr_blksz != doi.doi_data_block_size ||
+ nblkptr < doi.doi_nblkptr ||
+- drro->drr_dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
++ dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
+ 0, DMU_OBJECT_END);
+ if (err != 0)
+ return (SET_ERROR(EINVAL));
+ }
++
++ /*
++ * The dmu does not currently support decreasing nlevels
++ * on an object. For non-raw sends, this does not matter
++ * and the new object can just use the previous one's nlevels.
++ * For raw sends, however, the structure of the received dnode
++ * (including nlevels) must match that of the send side.
++ * Therefore, instead of using dmu_object_reclaim(), we must
++ * free the object completely and call dmu_object_claim_dnsize()
++ * instead.
++ */
++ if (dn_slots != doi.doi_dnodesize >> DNODE_SHIFT) {
++ err = dmu_free_long_object(rwa->os, drro->drr_object);
++ if (err != 0)
++ return (SET_ERROR(EINVAL));
++
++ txg_wait_synced(dmu_objset_pool(rwa->os), 0);
++ object = DMU_NEW_OBJECT;
++ }
+ } else if (err == EEXIST) {
+ /*
+ * The object requested is currently an interior slot of a
+@@ -2204,9 +2225,9 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ * another object from the previous snapshot. We must free
+ * these objects before we attempt to allocate the new dnode.
+ */
+- if (drro->drr_dn_slots > 1) {
++ if (dn_slots > 1) {
+ for (uint64_t slot = drro->drr_object + 1;
+- slot < drro->drr_object + drro->drr_dn_slots;
++ slot < drro->drr_object + dn_slots;
+ slot++) {
+ dmu_object_info_t slot_doi;
+
+@@ -2238,7 +2259,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
+ drro->drr_type, drro->drr_blksz,
+ drro->drr_bonustype, drro->drr_bonuslen,
+- drro->drr_dn_slots << DNODE_SHIFT, tx);
++ dn_slots << DNODE_SHIFT, tx);
+ } else if (drro->drr_type != doi.doi_type ||
+ drro->drr_blksz != doi.doi_data_block_size ||
+ drro->drr_bonustype != doi.doi_bonus_type ||
+@@ -2247,7 +2268,7 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ err = dmu_object_reclaim_dnsize(rwa->os, drro->drr_object,
+ drro->drr_type, drro->drr_blksz,
+ drro->drr_bonustype, drro->drr_bonuslen,
+- drro->drr_dn_slots << DNODE_SHIFT, tx);
++ dn_slots << DNODE_SHIFT, tx);
+ }
+ if (err != 0) {
+ dmu_tx_commit(tx);
diff --git a/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch b/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch
new file mode 100644
index 0000000..462cdbb
--- /dev/null
+++ b/zfs-patches/0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch
@@ -0,0 +1,42 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Georgy Yakovlev <ya at sysdump.net>
+Date: Thu, 10 May 2018 23:00:18 -0700
+Subject: [PATCH] Fix build with CONFIG_GCC_PLUGIN_RANDSTRUCT
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+fs/zfs/zfs/metaslab.c:1055:2: error: positional initialization of field
+in ‘struct’ declared with ‘designated_init’ attribute
+[-Werror=designated-init]
+ metaslab_rt_remove,
+
+Signed-off-by: Georgy Yakovlev <ya at sysdump.net>
+Reviewed-by: Giuseppe Di Natale <dinatale2 at llnl.gov>
+Closes: #7069
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/metaslab.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/module/zfs/metaslab.c b/module/zfs/metaslab.c
+index 5e413c06..ee24850d 100644
+--- a/module/zfs/metaslab.c
++++ b/module/zfs/metaslab.c
+@@ -1049,11 +1049,11 @@ metaslab_rt_vacate(range_tree_t *rt, void *arg)
+ }
+
+ static range_tree_ops_t metaslab_rt_ops = {
+- metaslab_rt_create,
+- metaslab_rt_destroy,
+- metaslab_rt_add,
+- metaslab_rt_remove,
+- metaslab_rt_vacate
++ .rtop_create = metaslab_rt_create,
++ .rtop_destroy = metaslab_rt_destroy,
++ .rtop_add = metaslab_rt_add,
++ .rtop_remove = metaslab_rt_remove,
++ .rtop_vacate = metaslab_rt_vacate
+ };
+
+ /*
diff --git a/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch b/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch
new file mode 100644
index 0000000..5df65fc
--- /dev/null
+++ b/zfs-patches/0033-Correctly-handle-errors-from-kern_path.patch
@@ -0,0 +1,35 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Chris Siebenmann <cks.github at cs.toronto.edu>
+Date: Wed, 5 Sep 2018 01:26:56 -0400
+Subject: [PATCH] Correctly handle errors from kern_path
+
+As a regular kernel function, kern_path() returns errors as negative
+errnos, such as -ELOOP. zfsctl_snapdir_vget() must convert these into
+the positive errnos used throughout the ZFS code when it returns them
+to other ZFS functions so that the ZFS code properly sees them as
+errors.
+
+Reviewed-by: George Melikov <mail at gmelikov.ru>
+Reviewed-by: Brian Behlendorf <behlendorf1 at llnl.gov>
+Signed-off-by: Chris Siebenmann <cks.git01 at cs.toronto.edu>
+Closes #7764
+Closes #7864
+
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ module/zfs/zfs_ctldir.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/module/zfs/zfs_ctldir.c b/module/zfs/zfs_ctldir.c
+index 25edea78..0ab5b4f0 100644
+--- a/module/zfs/zfs_ctldir.c
++++ b/module/zfs/zfs_ctldir.c
+@@ -1180,7 +1180,7 @@ zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
+ goto out;
+
+ /* Trigger automount */
+- error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
++ error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
+ if (error)
+ goto out;
+
diff --git a/zfs-patches/0034-Tag-zfs-0.7.10.patch b/zfs-patches/0034-Tag-zfs-0.7.10.patch
new file mode 100644
index 0000000..5c2f865
--- /dev/null
+++ b/zfs-patches/0034-Tag-zfs-0.7.10.patch
@@ -0,0 +1,56 @@
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Tony Hutter <hutter2 at llnl.gov>
+Date: Wed, 5 Sep 2018 10:37:32 -0700
+Subject: [PATCH] Tag zfs-0.7.10
+
+META file and changelog updated.
+
+Signed-off-by: Tony Hutter <hutter2 at llnl.gov>
+Signed-off-by: Stoiko Ivanov <s.ivanov at proxmox.com>
+---
+ META | 2 +-
+ rpm/generic/zfs-kmod.spec.in | 3 +++
+ rpm/generic/zfs.spec.in | 3 +++
+ 3 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/META b/META
+index fbada64e..89525ac8 100644
+--- a/META
++++ b/META
+@@ -1,7 +1,7 @@
+ Meta: 1
+ Name: zfs
+ Branch: 1.0
+-Version: 0.7.9
++Version: 0.7.10
+ Release: 1
+ Release-Tags: relext
+ License: CDDL
+diff --git a/rpm/generic/zfs-kmod.spec.in b/rpm/generic/zfs-kmod.spec.in
+index a3678681..8306ea76 100644
+--- a/rpm/generic/zfs-kmod.spec.in
++++ b/rpm/generic/zfs-kmod.spec.in
+@@ -191,6 +191,9 @@ chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/*
+ rm -rf $RPM_BUILD_ROOT
+
+ %changelog
++* Wed Sep 05 2018 Tony Hutter <hutter2 at llnl.gov> - 0.7.10-1
++- Released 0.7.10-1, detailed release notes are available at:
++- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10
+ * Tue May 08 2018 Tony Hutter <hutter2 at llnl.gov> - 0.7.9-1
+ - Released 0.7.9-1, detailed release notes are available at:
+ - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9
+diff --git a/rpm/generic/zfs.spec.in b/rpm/generic/zfs.spec.in
+index 22565725..76953aec 100644
+--- a/rpm/generic/zfs.spec.in
++++ b/rpm/generic/zfs.spec.in
+@@ -371,6 +371,9 @@ systemctl --system daemon-reload >/dev/null || true
+ %endif
+
+ %changelog
++* Wed Sep 05 2018 Tony Hutter <hutter2 at llnl.gov> - 0.7.10-1
++- Released 0.7.10-1, detailed release notes are available at:
++- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.10
+ * Tue May 08 2018 Tony Hutter <hutter2 at llnl.gov> - 0.7.9-1
+ - Released 0.7.9-1, detailed release notes are available at:
+ - https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.9
diff --git a/zfs-patches/series b/zfs-patches/series
index 5d154db..18cec2a 100644
--- a/zfs-patches/series
+++ b/zfs-patches/series
@@ -1,5 +1,34 @@
0001-remove-DKMS-modules-and-dracut-build.patch
0002-import-with-d-dev-disk-by-id-in-scan-service.patch
0003-always-load-ZFS-module-on-boot.patch
-0004-Fix-zpl_mount-deadlock.patch
-0005-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
+0004-Fix-deadlock-between-zfs-umount-snapentry_expire.patch
+0005-zv_suspend_lock-in-zvol_open-zvol_release.patch
+0006-Linux-4.18-compat-inode-timespec-timespec64.patch
+0007-Linux-compat-4.18-check_disk_size_change.patch
+0008-OpenZFS-8997-ztest-assertion-failure-in-zil_lwb_writ.patch
+0009-Fix-divide-by-zero-in-mmp_delay_update.patch
+0010-Fix-ENOSPC-in-Handle-zap_add-failures-in.patch
+0011-Trim-new-line-from-zfs_vdev_scheduler.patch
+0012-module-param-callbacks-check-for-initialized-spa.patch
+0013-Support-Debian-DKMS-builds.patch
+0014-zpool-reopen-should-detect-expanded-devices.patch
+0015-Add-pool-state-proc-entry-SUSPENDED-pools.patch
+0016-Linux-4.14-compat-blk_queue_stackable.patch
+0017-Default-ashift-for-Amazon-EC2-NVMe-devices.patch
+0018-Fix-kernel-unaligned-access-on-sparc64.patch
+0019-Fix-zpl_mount-deadlock.patch
+0020-OpenZFS-8906-uts-illumos-rootfs-should-support-salte.patch
+0021-Fix-zfs-incremental-send-remove-o-properties.patch
+0022-Allow-inherited-properties-in-zfs_check_settable.patch
+0023-Fix-arcstat.py-handling-of-unsupported-options.patch
+0024-Don-t-modify-argv-in-user-tools.patch
+0025-Add-missing-zfs-dracut-RPM-dependencies.patch
+0026-Add-libaio-devel-BuildRequires.patch
+0027-Fix-libaio-devel-requirement-for-Debian-based-distri.patch
+0028-Fedora-28-Fix-misc-bounds-check-compiler-warnings.patch
+0029-Fix-problems-receiving-reallocated-dnodes.patch
+0030-Fix-object-reclaim-when-using-large-dnodes.patch
+0031-Fix-zfs-recv-of-non-large_dnode-send-streams.patch
+0032-Fix-build-with-CONFIG_GCC_PLUGIN_RANDSTRUCT.patch
+0033-Correctly-handle-errors-from-kern_path.patch
+0034-Tag-zfs-0.7.10.patch
--
2.11.0
More information about the pve-devel
mailing list