[pbs-devel] [PATCH proxmox-backup] pbs-tools: zip: add conditional EFS flag to zip files

Dominik Csapak d.csapak at proxmox.com
Mon Sep 13 10:06:57 CEST 2021


this flag marks the file names as 'UTF-8' encoded if they are valid
UTF-8.

By default, encoding of file names in zips are defined as code page 437,
but we save the filenames as bytes (like in linux fs).

For linux systems this neither would be a problem since most tools
simply use the filenames as bytes, but for the zip utility under
windows it's important since NTFS uses UTF-16 for file names.

For filenames that are valid UTF-8, they are decoded as UTF-8 everywhere
correctly (Linux as UTF-8 bytes, Windows as correct UTF-16 sequence) and
for other filenames with a high bit set, it depends on the OS/Software
what exactly happens. Some cases below:

* Windows + Built-in/7zip: decoded as CP437
* Debian + zip: Bytes taken as-is
* Debian + 7z: interpreted as Windows1252, decoded as UTF-8

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
changes from RFC:
* set EFS flag conditionally when filename is valid UTF-8
* fix typo in const name
* proper comments for consts

 pbs-tools/src/zip.rs | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/pbs-tools/src/zip.rs b/pbs-tools/src/zip.rs
index 605480a8..62ebd4cf 100644
--- a/pbs-tools/src/zip.rs
+++ b/pbs-tools/src/zip.rs
@@ -34,6 +34,9 @@ const VERSION_MADE_BY: u16 = 0x032d;
 const ZIP64_EOCD_RECORD: u32 = 0x06064B50;
 const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50;
 
+const LFH_GENERAL_PURPOSE_FLAGS: u16 = 1 << 3; // we place crc32 in the data descriptor
+const LFH_GPF_EFS_BIT: u16 = 1 << 11; // EFS, marks filename & comment as UTF-8
+
 // bits for time:
 // 0-4: day of the month (1-31)
 // 5-8: month: (1 = jan, etc.)
@@ -200,8 +203,11 @@ pub struct ZipEntry {
     compressed_size: u64,
     offset: u64,
     is_file: bool,
+    is_utf8_filename: bool,
 }
 
+
+
 impl ZipEntry {
     /// Creates a new ZipEntry
     ///
@@ -220,8 +226,11 @@ impl ZipEntry {
             relpath.push(""); // adds trailing slash
         }
 
+        let filename: OsString = relpath.into();
+        let is_utf8_filename = filename.to_str().is_some();
+
         Self {
-            filename: relpath.into(),
+            filename,
             crc32: 0,
             mtime,
             mode,
@@ -229,6 +238,15 @@ impl ZipEntry {
             compressed_size: 0,
             offset: 0,
             is_file,
+            is_utf8_filename,
+        }
+    }
+
+    fn get_general_purpose_flags(&self) -> u16 {
+        if self.is_utf8_filename {
+            LFH_GENERAL_PURPOSE_FLAGS | LFH_GPF_EFS_BIT
+        } else {
+            LFH_GENERAL_PURPOSE_FLAGS
         }
     }
 
@@ -249,7 +267,7 @@ impl ZipEntry {
             LocalFileHeader {
                 signature: LOCAL_FH_SIG,
                 version_needed: 0x2d,
-                flags: 1 << 3,
+                flags: self.get_general_purpose_flags(),
                 compression: 0x8,
                 time,
                 date,
@@ -332,7 +350,7 @@ impl ZipEntry {
                 signature: CENTRAL_DIRECTORY_FH_SIG,
                 version_made_by: VERSION_MADE_BY,
                 version_needed: VERSION_NEEDED,
-                flags: 1 << 3,
+                flags: self.get_general_purpose_flags(),
                 compression: 0x8,
                 time,
                 date,
-- 
2.30.2






More information about the pbs-devel mailing list