[pbs-devel] [PATCH proxmox-backup] tape/drive: improve tape device locking behaviour

Dominik Csapak d.csapak at proxmox.com
Wed Jun 2 10:19:36 CEST 2021


by passing through errors if they are of the 'Interrupted' kind,
since that happens mostly when the lock is interrupted by the
timeout timer signal.

In the api, check in the worker loop for exactly this error and continue only
then. All other errors lead to a aborted task.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
an alternative solution would be to change the function signature to
return an Option<Guard> instead and check that, but this would
be a 'weird' interface for a locking function...

 src/api2/tape/backup.rs | 20 +++++++++++++++-----
 src/tape/drive/mod.rs   | 10 ++++++++--
 2 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/api2/tape/backup.rs b/src/api2/tape/backup.rs
index 77b11bb0..7e1de88e 100644
--- a/src/api2/tape/backup.rs
+++ b/src/api2/tape/backup.rs
@@ -203,12 +203,22 @@ pub fn do_tape_backup_job(
                     // for scheduled tape backup jobs, we wait indefinitely for the lock
                     task_log!(worker, "waiting for drive lock...");
                     loop {
-                        if let Ok(lock) = lock_tape_device(&drive_config, &setup.drive) {
-                            drive_lock = Some(lock);
-                            break;
-                        } // ignore errors
-
                         worker.check_abort()?;
+                        match lock_tape_device(&drive_config, &setup.drive) {
+                            Ok(lock) => {
+                                drive_lock = Some(lock);
+                                break;
+                            }
+                            Err(err) => {
+                                if let Some(err) = err.downcast_ref::<std::io::Error>() {
+                                    if err.kind() == std::io::ErrorKind::Interrupted {
+                                        // locking was probably interrupted due to a timeout
+                                        continue;
+                                    }
+                                }
+                                return Err(err);
+                            }
+                        }
                     }
                 }
                 set_tape_device_state(&setup.drive, &worker.upid().to_string())?;
diff --git a/src/tape/drive/mod.rs b/src/tape/drive/mod.rs
index f72e0b51..5cc81924 100644
--- a/src/tape/drive/mod.rs
+++ b/src/tape/drive/mod.rs
@@ -485,8 +485,14 @@ pub fn lock_tape_device(
     drive: &str,
 ) -> Result<DeviceLockGuard, Error> {
     let path = tape_device_path(config, drive)?;
-    lock_device_path(&path)
-        .map_err(|err| format_err!("unable to lock drive '{}' - {}", drive, err))
+    match lock_device_path(&path) {
+        Ok(lock) => Ok(lock),
+        // we do not change interrrupted errors, so that the caller can catch that
+        Err(err) => match err.downcast_ref::<std::io::Error>() {
+            Some(e) if e.kind() == std::io::ErrorKind::Interrupted => Err(err),
+            _ => bail!("unable to lock drive '{}' - {}", drive, err),
+        }
+    }
 }
 
 /// Writes the given state for the specified drive
-- 
2.20.1






More information about the pbs-devel mailing list