[pve-devel] [RFC PATCH common] Tools: make file-locking aware of external exception sources

Wolfgang Bumiller w.bumiller at proxmox.com
Wed Feb 8 14:16:46 CET 2017


Previously an external exception (eg. caused by a SIGARLM in a code
which is already inside a run_with_timeout() call) could happen in
various places where we did not properly this situation.
For instance after calling $lock_func() but before reaching the cleanup
code. In this case a lock was leaked.
Additionally the code was broken in that it used perl's automatic hash
creation side effect ($a->{x}->{y} implicitly initializing $a->{x} with
an empty hash when it did not exist). The effect was that if our own
time out was triggered after the initial check for an existing file
handle inside $lock_func() happened (extremely rare since perl would have
to be running insanely slow), the cleanup did:

    if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {

This recreated $lock_handles->{$$}->{$filename} as an empty hash.
A subsequent call to lock_file_full() will think a file descriptor
already exists because the check simply used:

    if (!$lock_handles->{$$}->{$filename}) {

While this could have been a one-line fix for this one particular case,
we'd still not be taking external timeouts into account causing the
first issue described above.

---
Dealing with race conditions in single threaded code - awesome.

 src/PVE/Tools.pm | 95 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 55 insertions(+), 40 deletions(-)

diff --git a/src/PVE/Tools.pm b/src/PVE/Tools.pm
index df66047..6ea2f33 100644
--- a/src/PVE/Tools.pm
+++ b/src/PVE/Tools.pm
@@ -25,6 +25,7 @@ use Time::HiRes qw(usleep gettimeofday tv_interval alarm);
 use Net::DBus qw(dbus_uint32 dbus_uint64);
 use Net::DBus::Callback;
 use Net::DBus::Reactor;
+use Scalar::Util 'weaken';
 
 # avoid warning when parsing long hex values with hex()
 no warnings 'portable'; # Support for 64-bit ints required
@@ -122,7 +123,12 @@ sub run_with_timeout {
 }
 
 # flock: we use one file handle per process, so lock file
-# can be called multiple times and succeeds for the same process.
+# can be nested multiple times and succeeds for the same process.
+#
+# Since this is the only way we lock now and we don't have the old
+# 'lock(); code(); unlock();' pattern anymore we do not actually need to
+# count how deep we're nesting. Therefore this hash now stores a weak reference
+# to a boolean telling us whether we already have a lock.
 
 my $lock_handles =  {};
 
@@ -133,58 +139,67 @@ sub lock_file_full {
 
     my $mode = $shared ? LOCK_SH : LOCK_EX;
 
-    my $lock_func = sub {
-        if (!$lock_handles->{$$}->{$filename}) {
-	    my $fh = new IO::File(">>$filename") ||
-		die "can't open file - $!\n";
-	    $lock_handles->{$$}->{$filename} = { fh => $fh, refcount => 0};
-        }
+    my $lockhash = ($lock_handles->{$$} //= {});
+
+    # Returns a locked file handle.
+    my $get_locked_file = sub {
+	my $fh = IO::File->new(">>$filename")
+	    or die "can't open file - $!\n";
 
-        if (!flock($lock_handles->{$$}->{$filename}->{fh}, $mode|LOCK_NB)) {
-            print STDERR "trying to acquire lock...";
+	if (!flock($fh, $mode|LOCK_NB)) {
+	    print STDERR "trying to acquire lock...";
 	    my $success;
 	    while(1) {
-		$success = flock($lock_handles->{$$}->{$filename}->{fh}, $mode);
+		$success = flock($fh, $mode);
 		# try again on EINTR (see bug #273)
 		if ($success || ($! != EINTR)) {
 		    last;
 		}
 	    }
-            if (!$success) {
-                print STDERR " failed\n";
-                die "can't acquire lock '$filename' - $!\n";
-            }
-            print STDERR " OK\n";
-        }
-	$lock_handles->{$$}->{$filename}->{refcount}++;
+	    if (!$success) {
+		print STDERR " failed\n";
+		die "can't acquire lock '$filename' - $!\n";
+	    }
+	    print STDERR " OK\n";
+	}
+
+	return $fh;
     };
 
     my $res;
-
-    eval { run_with_timeout($timeout, $lock_func); };
-    my $err = $@;
-    if ($err) {
-	$err = "can't lock file '$filename' - $err";
-    } else {
-	eval { $res = &$code(@param) };
-	$err = $@;
-    }
-
-    if (my $fh = $lock_handles->{$$}->{$filename}->{fh}) {
-	my $refcount = --$lock_handles->{$$}->{$filename}->{refcount};
-	if ($refcount <= 0) {
-	    $lock_handles->{$$}->{$filename} = undef;
-	    close ($fh);
+    my $checkptr = $lockhash->{$filename};
+    my $local_fh; # This must stay local
+    if (!$checkptr || !$$checkptr) {
+	# We cannot create a weak reference in a single atomic step, so we first
+	# create a false-value, then create a reference to it, then weaken it,
+	# and after successfully locking the file we change the boolean value.
+	#
+	# The reason for this is that if an outer SIGALRM throws an exception
+	# between creating the reference and weakening it, a subsequent call to
+	# lock_file_full() will see a leftover full reference to a valid
+	# variable. This variable must be 0 in order for said call to attempt to
+	# lock the file anew.
+	#
+	# An externally triggered exception elsewhere in the code will cause the
+	# weak reference to become 'undef', and since the file handle is only
+	# stored in the local scope in $local_fh, the file will be closed by
+	# perl's cleanup routines as well.
+	#
+	# This still assumes that an IO::File handle can properly deal with such
+	# exceptions thrown during its own destruction, but that's up to perls
+	# guts now.
+	$checkptr = 0;
+	$lockhash->{$filename} = \$checkptr;
+	weaken $lockhash->{$filename};
+	$local_fh = eval { run_with_timeout($timeout, $get_locked_file) };
+	if ($@) {
+	    $@ = "can't lock file '$filename' - $@";
+	    return undef;
 	}
+	$checkptr = 1;
     }
-
-    if ($err) {
-        $@ = $err;
-        return undef;
-    }
-
-    $@ = undef;
-
+    $res = eval { &$code(@param); };
+    return undef if $@;
     return $res;
 }
 
-- 
2.1.4





More information about the pve-devel mailing list