[pbs-devel] [PATCH proxmox-backup v3 1/3] tools: add zip module

Dominik Csapak d.csapak at proxmox.com
Wed Oct 21 09:29:06 CEST 2020


This modules contains the 'ZipEncoder' struct, which wraps an async writer,
to create a ZIP archive on the fly

To create a ZIP file, have a target that implements AsyncWrite,
give it to ZipEncoder::new, add entries via 'add_entry' and
at the end, call 'finish'

for now, this does not implement compression (uses ZIPs STORE mode), and
does not support empty directories or hardlinks (or any other special
files)

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
changes from v2:
* use ByteBuffer instead of Vec<u8> and read_buf (also increase buf size to 1M)

 src/tools.rs     |   1 +
 src/tools/zip.rs | 518 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 519 insertions(+)
 create mode 100644 src/tools/zip.rs

diff --git a/src/tools.rs b/src/tools.rs
index 1837c0e0..5a9f020a 100644
--- a/src/tools.rs
+++ b/src/tools.rs
@@ -36,6 +36,7 @@ pub mod logrotate;
 pub mod loopdev;
 pub mod fuse_loop;
 pub mod socket;
+pub mod zip;
 
 mod parallel_handler;
 pub use parallel_handler::*;
diff --git a/src/tools/zip.rs b/src/tools/zip.rs
new file mode 100644
index 00000000..3248239f
--- /dev/null
+++ b/src/tools/zip.rs
@@ -0,0 +1,518 @@
+//! ZIP Helper
+//!
+//! Provides an interface to create a ZIP File from ZipEntries
+//! for a more detailed description of the ZIP format, see:
+//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
+
+use std::convert::TryInto;
+use std::ffi::OsString;
+use std::io;
+use std::mem::size_of;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Component, Path, PathBuf};
+
+use anyhow::{Error, Result};
+use endian_trait::Endian;
+use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
+
+use crc32fast::Hasher;
+use proxmox::tools::time::gmtime;
+use proxmox::tools::byte_buffer::ByteBuffer;
+
+const LOCAL_FH_SIG: u32 = 0x04034B50;
+const LOCAL_FF_SIG: u32 = 0x08074B50;
+const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50;
+const END_OF_CENTRAL_DIR: u32 = 0x06054B50;
+const VERSION_NEEDED: u16 = 0x002d;
+const VERSION_MADE_BY: u16 = 0x032d;
+
+const ZIP64_EOCD_RECORD: u32 = 0x06064B50;
+const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50;
+
+// bits for time:
+// 0-4: day of the month (1-31)
+// 5-8: month: (1 = jan, etc.)
+// 9-15: year offset from 1980
+//
+// bits for date:
+// 0-4: second / 2
+// 5-10: minute (0-59)
+// 11-15: hour (0-23)
+//
+// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime
+fn epoch_to_dos(epoch: i64) -> (u16, u16) {
+    let gmtime = match gmtime(epoch) {
+        Ok(gmtime) => gmtime,
+        Err(_) => return (0, 0),
+    };
+
+    let seconds = (gmtime.tm_sec / 2) & 0b11111;
+    let minutes = gmtime.tm_min & 0xb111111;
+    let hours = gmtime.tm_hour & 0b11111;
+    let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16;
+
+    let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) {
+        0
+    } else {
+        let day = gmtime.tm_mday & 0b11111;
+        let month = (gmtime.tm_mon + 1) & 0b1111;
+        let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111;
+        ((year << 9) | (month << 5) | (day)) as u16
+    };
+
+    (date, time)
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct Zip64Field {
+    field_type: u16,
+    field_size: u16,
+    uncompressed_size: u64,
+    compressed_size: u64,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct Zip64FieldWithOffset {
+    field_type: u16,
+    field_size: u16,
+    uncompressed_size: u64,
+    compressed_size: u64,
+    offset: u64,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct LocalFileHeader {
+    signature: u32,
+    version_needed: u16,
+    flags: u16,
+    compression: u16,
+    time: u16,
+    date: u16,
+    crc32: u32,
+    compressed_size: u32,
+    uncompressed_size: u32,
+    filename_len: u16,
+    extra_field_len: u16,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct LocalFileFooter {
+    signature: u32,
+    crc32: u32,
+    compressed_size: u64,
+    uncompressed_size: u64,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct CentralDirectoryFileHeader {
+    signature: u32,
+    version_made_by: u16,
+    version_needed: u16,
+    flags: u16,
+    compression: u16,
+    time: u16,
+    date: u16,
+    crc32: u32,
+    compressed_size: u32,
+    uncompressed_size: u32,
+    filename_len: u16,
+    extra_field_len: u16,
+    comment_len: u16,
+    start_disk: u16,
+    internal_flags: u16,
+    external_flags: u32,
+    offset: u32,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct EndOfCentralDir {
+    signature: u32,
+    disk_number: u16,
+    start_disk: u16,
+    disk_record_count: u16,
+    total_record_count: u16,
+    directory_size: u32,
+    directory_offset: u32,
+    comment_len: u16,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct Zip64EOCDRecord {
+    signature: u32,
+    field_size: u64,
+    version_made_by: u16,
+    version_needed: u16,
+    disk_number: u32,
+    disk_number_central_dir: u32,
+    disk_record_count: u64,
+    total_record_count: u64,
+    directory_size: u64,
+    directory_offset: u64,
+}
+
+#[derive(Endian)]
+#[repr(C, packed)]
+struct Zip64EOCDLocator {
+    signature: u32,
+    disk_number: u32,
+    offset: u64,
+    disk_count: u32,
+}
+
+async fn write_struct<E, T>(output: &mut T, data: E) -> io::Result<()>
+where
+    T: AsyncWrite + ?Sized + Unpin,
+    E: Endian,
+{
+    let data = data.to_le();
+
+    let data = unsafe {
+        std::slice::from_raw_parts(
+            &data as *const E as *const u8,
+            core::mem::size_of_val(&data),
+        )
+    };
+    output.write_all(data).await
+}
+
+/// Represents an Entry in a ZIP File
+///
+/// used to add to a ZipEncoder
+pub struct ZipEntry {
+    filename: OsString,
+    mtime: i64,
+    mode: u16,
+    crc32: u32,
+    uncompressed_size: u64,
+    compressed_size: u64,
+    offset: u64,
+    is_file: bool,
+}
+
+impl ZipEntry {
+    /// Creates a new ZipEntry
+    ///
+    /// if is_file is false the path will contain an trailing separator,
+    /// so that the zip file understands that it is a directory
+    pub fn new<P: AsRef<Path>>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self {
+        let mut relpath = PathBuf::new();
+
+        for comp in path.as_ref().components() {
+            if let Component::Normal(_) = comp {
+                relpath.push(comp);
+            }
+        }
+
+        if !is_file {
+            relpath.push(""); // adds trailing slash
+        }
+
+        Self {
+            filename: relpath.into(),
+            crc32: 0,
+            mtime,
+            mode,
+            uncompressed_size: 0,
+            compressed_size: 0,
+            offset: 0,
+            is_file,
+        }
+    }
+
+    async fn write_local_header<W>(&self, mut buf: &mut W) -> io::Result<usize>
+    where
+        W: AsyncWrite + Unpin + ?Sized,
+    {
+        let filename = self.filename.as_bytes();
+        let filename_len = filename.len();
+        let header_size = size_of::<LocalFileHeader>();
+        let zip_field_size = size_of::<Zip64Field>();
+        let size: usize = header_size + filename_len + zip_field_size;
+
+        let (date, time) = epoch_to_dos(self.mtime);
+
+        write_struct(
+            &mut buf,
+            LocalFileHeader {
+                signature: LOCAL_FH_SIG,
+                version_needed: 0x2d,
+                flags: 1 << 3,
+                compression: 0,
+                time,
+                date,
+                crc32: 0,
+                compressed_size: 0xFFFFFFFF,
+                uncompressed_size: 0xFFFFFFFF,
+                filename_len: filename_len as u16,
+                extra_field_len: zip_field_size as u16,
+            },
+        )
+        .await?;
+
+        buf.write_all(filename).await?;
+
+        write_struct(
+            &mut buf,
+            Zip64Field {
+                field_type: 0x0001,
+                field_size: 2 * 8,
+                uncompressed_size: 0,
+                compressed_size: 0,
+            },
+        )
+        .await?;
+
+        Ok(size)
+    }
+
+    async fn write_data_descriptor<W: AsyncWrite + Unpin + ?Sized>(
+        &self,
+        mut buf: &mut W,
+    ) -> io::Result<usize> {
+        let size = size_of::<LocalFileFooter>();
+
+        write_struct(
+            &mut buf,
+            LocalFileFooter {
+                signature: LOCAL_FF_SIG,
+                crc32: self.crc32,
+                compressed_size: self.compressed_size,
+                uncompressed_size: self.uncompressed_size,
+            },
+        )
+        .await?;
+
+        Ok(size)
+    }
+
+    async fn write_central_directory_header<W: AsyncWrite + Unpin + ?Sized>(
+        &self,
+        mut buf: &mut W,
+    ) -> io::Result<usize> {
+        let filename = self.filename.as_bytes();
+        let filename_len = filename.len();
+        let header_size = size_of::<CentralDirectoryFileHeader>();
+        let zip_field_size = size_of::<Zip64FieldWithOffset>();
+        let size: usize = header_size + filename_len + zip_field_size;
+
+        let (date, time) = epoch_to_dos(self.mtime);
+
+        write_struct(
+            &mut buf,
+            CentralDirectoryFileHeader {
+                signature: CENTRAL_DIRECTORY_FH_SIG,
+                version_made_by: VERSION_MADE_BY,
+                version_needed: VERSION_NEEDED,
+                flags: 1 << 3,
+                compression: 0,
+                time,
+                date,
+                crc32: self.crc32,
+                compressed_size: 0xFFFFFFFF,
+                uncompressed_size: 0xFFFFFFFF,
+                filename_len: filename_len as u16,
+                extra_field_len: zip_field_size as u16,
+                comment_len: 0,
+                start_disk: 0,
+                internal_flags: 0,
+                external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4,
+                offset: 0xFFFFFFFF,
+            },
+        )
+        .await?;
+
+        buf.write_all(filename).await?;
+
+        write_struct(
+            &mut buf,
+            Zip64FieldWithOffset {
+                field_type: 1,
+                field_size: 3 * 8,
+                uncompressed_size: self.uncompressed_size,
+                compressed_size: self.compressed_size,
+                offset: self.offset,
+            },
+        )
+        .await?;
+
+        Ok(size)
+    }
+}
+
+/// Wraps a writer that implements AsyncWrite for creating a ZIP archive
+///
+/// This will create a ZIP archive on the fly with files added with
+/// 'add_entry'. To Finish the file, call 'finish'
+/// Example:
+/// ```no_run
+/// use proxmox_backup::tools::zip::*;
+/// use tokio::fs::File;
+///
+/// #[tokio::async]
+/// async fn main() ->  std::io::Result<()> {
+///     let target = File::open("foo.zip").await?;
+///     let mut source = File::open("foo.txt").await?;
+///
+///     let mut zip = ZipEncoder::new(target);
+///     zip.add_entry(ZipEntry {
+///         "foo.txt",
+///         0,
+///         0o100755,
+///         true,
+///     }, source).await?;
+///
+///     zip.finish().await?
+///
+///     Ok(())
+/// }
+/// ```
+pub struct ZipEncoder<W>
+where
+    W: AsyncWrite + Unpin,
+{
+    byte_count: usize,
+    files: Vec<ZipEntry>,
+    target: W,
+    buf: ByteBuffer,
+}
+
+impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
+    pub fn new(target: W) -> Self {
+        Self {
+            byte_count: 0,
+            files: Vec::new(),
+            target,
+            buf: ByteBuffer::with_capacity(1024*1024),
+        }
+    }
+
+    pub async fn add_entry<R: AsyncRead + Unpin>(
+        &mut self,
+        mut entry: ZipEntry,
+        content: Option<R>,
+    ) -> Result<(), Error> {
+        entry.offset = self.byte_count.try_into()?;
+        self.byte_count += entry.write_local_header(&mut self.target).await?;
+        if let Some(mut content) = content {
+            let mut hasher = Hasher::new();
+            let mut size = 0;
+            loop {
+
+                let count = self.buf.read_from_async(&mut content).await?;
+
+                // end of file
+                if count == 0 {
+                    break;
+                }
+
+                size += count;
+                hasher.update(&self.buf);
+                self.target.write_all(&self.buf).await?;
+                self.buf.consume(count);
+            }
+
+            self.byte_count += size;
+            entry.compressed_size = size.try_into()?;
+            entry.uncompressed_size = size.try_into()?;
+            entry.crc32 = hasher.finalize();
+        }
+        self.byte_count += entry.write_data_descriptor(&mut self.target).await?;
+
+        self.files.push(entry);
+
+        Ok(())
+    }
+
+    async fn write_eocd(
+        &mut self,
+        central_dir_size: usize,
+        central_dir_offset: usize,
+    ) -> Result<(), Error> {
+        let entrycount = self.files.len();
+
+        let mut count = entrycount as u16;
+        let mut directory_size = central_dir_size as u32;
+        let mut directory_offset = central_dir_offset as u32;
+
+        if central_dir_size > u32::MAX as usize
+            || central_dir_offset > u32::MAX as usize
+            || entrycount > u16::MAX as usize
+        {
+            count = 0xFFFF;
+            directory_size = 0xFFFFFFFF;
+            directory_offset = 0xFFFFFFFF;
+
+            write_struct(
+                &mut self.target,
+                Zip64EOCDRecord {
+                    signature: ZIP64_EOCD_RECORD,
+                    field_size: 44,
+                    version_made_by: VERSION_MADE_BY,
+                    version_needed: VERSION_NEEDED,
+                    disk_number: 0,
+                    disk_number_central_dir: 0,
+                    disk_record_count: entrycount.try_into()?,
+                    total_record_count: entrycount.try_into()?,
+                    directory_size: central_dir_size.try_into()?,
+                    directory_offset: central_dir_offset.try_into()?,
+                },
+            )
+            .await?;
+
+            let locator_offset = central_dir_offset + central_dir_size;
+
+            write_struct(
+                &mut self.target,
+                Zip64EOCDLocator {
+                    signature: ZIP64_EOCD_LOCATOR,
+                    disk_number: 0,
+                    offset: locator_offset.try_into()?,
+                    disk_count: 1,
+                },
+            )
+            .await?;
+        }
+
+        write_struct(
+            &mut self.target,
+            EndOfCentralDir {
+                signature: END_OF_CENTRAL_DIR,
+                disk_number: 0,
+                start_disk: 0,
+                disk_record_count: count,
+                total_record_count: count,
+                directory_size,
+                directory_offset,
+                comment_len: 0,
+            },
+        )
+        .await?;
+
+        Ok(())
+    }
+
+    pub async fn finish(&mut self) -> Result<(), Error> {
+        let central_dir_offset = self.byte_count;
+        let mut central_dir_size = 0;
+
+        for file in &self.files {
+            central_dir_size += file
+                .write_central_directory_header(&mut self.target)
+                .await?;
+        }
+
+        self.write_eocd(central_dir_size, central_dir_offset)
+            .await?;
+
+        self.target.flush().await?;
+
+        Ok(())
+    }
+}
-- 
2.20.1






More information about the pbs-devel mailing list