[pbs-devel] applied series: [PATCH proxmox-backup v3 1/3] tools: add zip module

Wolfgang Bumiller w.bumiller at proxmox.com
Wed Oct 21 10:51:39 CEST 2020


applied series with minor fixup commits

On Wed, Oct 21, 2020 at 09:29:06AM +0200, Dominik Csapak wrote:
> This modules contains the 'ZipEncoder' struct, which wraps an async writer,
> to create a ZIP archive on the fly
> 
> To create a ZIP file, have a target that implements AsyncWrite,
> give it to ZipEncoder::new, add entries via 'add_entry' and
> at the end, call 'finish'
> 
> for now, this does not implement compression (uses ZIPs STORE mode), and
> does not support empty directories or hardlinks (or any other special
> files)
> 
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
> changes from v2:
> * use ByteBuffer instead of Vec<u8> and read_buf (also increase buf size to 1M)
> 
>  src/tools.rs     |   1 +
>  src/tools/zip.rs | 518 +++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 519 insertions(+)
>  create mode 100644 src/tools/zip.rs
> 
> diff --git a/src/tools.rs b/src/tools.rs
> index 1837c0e0..5a9f020a 100644
> --- a/src/tools.rs
> +++ b/src/tools.rs
> @@ -36,6 +36,7 @@ pub mod logrotate;
>  pub mod loopdev;
>  pub mod fuse_loop;
>  pub mod socket;
> +pub mod zip;
>  
>  mod parallel_handler;
>  pub use parallel_handler::*;
> diff --git a/src/tools/zip.rs b/src/tools/zip.rs
> new file mode 100644
> index 00000000..3248239f
> --- /dev/null
> +++ b/src/tools/zip.rs
> @@ -0,0 +1,518 @@
> +//! ZIP Helper
> +//!
> +//! Provides an interface to create a ZIP File from ZipEntries
> +//! for a more detailed description of the ZIP format, see:
> +//! https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
> +
> +use std::convert::TryInto;
> +use std::ffi::OsString;
> +use std::io;
> +use std::mem::size_of;
> +use std::os::unix::ffi::OsStrExt;
> +use std::path::{Component, Path, PathBuf};
> +
> +use anyhow::{Error, Result};
> +use endian_trait::Endian;
> +use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
> +
> +use crc32fast::Hasher;
> +use proxmox::tools::time::gmtime;
> +use proxmox::tools::byte_buffer::ByteBuffer;
> +
> +const LOCAL_FH_SIG: u32 = 0x04034B50;
> +const LOCAL_FF_SIG: u32 = 0x08074B50;
> +const CENTRAL_DIRECTORY_FH_SIG: u32 = 0x02014B50;
> +const END_OF_CENTRAL_DIR: u32 = 0x06054B50;
> +const VERSION_NEEDED: u16 = 0x002d;
> +const VERSION_MADE_BY: u16 = 0x032d;
> +
> +const ZIP64_EOCD_RECORD: u32 = 0x06064B50;
> +const ZIP64_EOCD_LOCATOR: u32 = 0x07064B50;
> +
> +// bits for time:
> +// 0-4: day of the month (1-31)
> +// 5-8: month: (1 = jan, etc.)
> +// 9-15: year offset from 1980
> +//
> +// bits for date:
> +// 0-4: second / 2
> +// 5-10: minute (0-59)
> +// 11-15: hour (0-23)
> +//
> +// see https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-filetimetodosdatetime
> +fn epoch_to_dos(epoch: i64) -> (u16, u16) {
> +    let gmtime = match gmtime(epoch) {
> +        Ok(gmtime) => gmtime,
> +        Err(_) => return (0, 0),
> +    };
> +
> +    let seconds = (gmtime.tm_sec / 2) & 0b11111;
> +    let minutes = gmtime.tm_min & 0xb111111;
> +    let hours = gmtime.tm_hour & 0b11111;
> +    let time: u16 = ((hours << 11) | (minutes << 5) | (seconds)) as u16;
> +
> +    let date: u16 = if gmtime.tm_year > (2108 - 1900) || gmtime.tm_year < (1980 - 1900) {
> +        0
> +    } else {
> +        let day = gmtime.tm_mday & 0b11111;
> +        let month = (gmtime.tm_mon + 1) & 0b1111;
> +        let year = (gmtime.tm_year + 1900 - 1980) & 0b1111111;
> +        ((year << 9) | (month << 5) | (day)) as u16
> +    };
> +
> +    (date, time)
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct Zip64Field {
> +    field_type: u16,
> +    field_size: u16,
> +    uncompressed_size: u64,
> +    compressed_size: u64,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct Zip64FieldWithOffset {
> +    field_type: u16,
> +    field_size: u16,
> +    uncompressed_size: u64,
> +    compressed_size: u64,
> +    offset: u64,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct LocalFileHeader {
> +    signature: u32,
> +    version_needed: u16,
> +    flags: u16,
> +    compression: u16,
> +    time: u16,
> +    date: u16,
> +    crc32: u32,
> +    compressed_size: u32,
> +    uncompressed_size: u32,
> +    filename_len: u16,
> +    extra_field_len: u16,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct LocalFileFooter {
> +    signature: u32,
> +    crc32: u32,
> +    compressed_size: u64,
> +    uncompressed_size: u64,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct CentralDirectoryFileHeader {
> +    signature: u32,
> +    version_made_by: u16,
> +    version_needed: u16,
> +    flags: u16,
> +    compression: u16,
> +    time: u16,
> +    date: u16,
> +    crc32: u32,
> +    compressed_size: u32,
> +    uncompressed_size: u32,
> +    filename_len: u16,
> +    extra_field_len: u16,
> +    comment_len: u16,
> +    start_disk: u16,
> +    internal_flags: u16,
> +    external_flags: u32,
> +    offset: u32,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct EndOfCentralDir {
> +    signature: u32,
> +    disk_number: u16,
> +    start_disk: u16,
> +    disk_record_count: u16,
> +    total_record_count: u16,
> +    directory_size: u32,
> +    directory_offset: u32,
> +    comment_len: u16,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct Zip64EOCDRecord {
> +    signature: u32,
> +    field_size: u64,
> +    version_made_by: u16,
> +    version_needed: u16,
> +    disk_number: u32,
> +    disk_number_central_dir: u32,
> +    disk_record_count: u64,
> +    total_record_count: u64,
> +    directory_size: u64,
> +    directory_offset: u64,
> +}
> +
> +#[derive(Endian)]
> +#[repr(C, packed)]
> +struct Zip64EOCDLocator {
> +    signature: u32,
> +    disk_number: u32,
> +    offset: u64,
> +    disk_count: u32,
> +}
> +
> +async fn write_struct<E, T>(output: &mut T, data: E) -> io::Result<()>
> +where
> +    T: AsyncWrite + ?Sized + Unpin,
> +    E: Endian,
> +{
> +    let data = data.to_le();
> +
> +    let data = unsafe {
> +        std::slice::from_raw_parts(
> +            &data as *const E as *const u8,
> +            core::mem::size_of_val(&data),
> +        )
> +    };
> +    output.write_all(data).await
> +}
> +
> +/// Represents an Entry in a ZIP File
> +///
> +/// used to add to a ZipEncoder
> +pub struct ZipEntry {
> +    filename: OsString,
> +    mtime: i64,
> +    mode: u16,
> +    crc32: u32,
> +    uncompressed_size: u64,
> +    compressed_size: u64,
> +    offset: u64,
> +    is_file: bool,
> +}
> +
> +impl ZipEntry {
> +    /// Creates a new ZipEntry
> +    ///
> +    /// if is_file is false the path will contain an trailing separator,
> +    /// so that the zip file understands that it is a directory
> +    pub fn new<P: AsRef<Path>>(path: P, mtime: i64, mode: u16, is_file: bool) -> Self {
> +        let mut relpath = PathBuf::new();
> +
> +        for comp in path.as_ref().components() {
> +            if let Component::Normal(_) = comp {
> +                relpath.push(comp);
> +            }
> +        }
> +
> +        if !is_file {
> +            relpath.push(""); // adds trailing slash
> +        }
> +
> +        Self {
> +            filename: relpath.into(),
> +            crc32: 0,
> +            mtime,
> +            mode,
> +            uncompressed_size: 0,
> +            compressed_size: 0,
> +            offset: 0,
> +            is_file,
> +        }
> +    }
> +
> +    async fn write_local_header<W>(&self, mut buf: &mut W) -> io::Result<usize>
> +    where
> +        W: AsyncWrite + Unpin + ?Sized,
> +    {
> +        let filename = self.filename.as_bytes();
> +        let filename_len = filename.len();
> +        let header_size = size_of::<LocalFileHeader>();
> +        let zip_field_size = size_of::<Zip64Field>();
> +        let size: usize = header_size + filename_len + zip_field_size;
> +
> +        let (date, time) = epoch_to_dos(self.mtime);
> +
> +        write_struct(
> +            &mut buf,
> +            LocalFileHeader {
> +                signature: LOCAL_FH_SIG,
> +                version_needed: 0x2d,
> +                flags: 1 << 3,
> +                compression: 0,
> +                time,
> +                date,
> +                crc32: 0,
> +                compressed_size: 0xFFFFFFFF,
> +                uncompressed_size: 0xFFFFFFFF,
> +                filename_len: filename_len as u16,
> +                extra_field_len: zip_field_size as u16,
> +            },
> +        )
> +        .await?;
> +
> +        buf.write_all(filename).await?;
> +
> +        write_struct(
> +            &mut buf,
> +            Zip64Field {
> +                field_type: 0x0001,
> +                field_size: 2 * 8,
> +                uncompressed_size: 0,
> +                compressed_size: 0,
> +            },
> +        )
> +        .await?;
> +
> +        Ok(size)
> +    }
> +
> +    async fn write_data_descriptor<W: AsyncWrite + Unpin + ?Sized>(
> +        &self,
> +        mut buf: &mut W,
> +    ) -> io::Result<usize> {
> +        let size = size_of::<LocalFileFooter>();
> +
> +        write_struct(
> +            &mut buf,
> +            LocalFileFooter {
> +                signature: LOCAL_FF_SIG,
> +                crc32: self.crc32,
> +                compressed_size: self.compressed_size,
> +                uncompressed_size: self.uncompressed_size,
> +            },
> +        )
> +        .await?;
> +
> +        Ok(size)
> +    }
> +
> +    async fn write_central_directory_header<W: AsyncWrite + Unpin + ?Sized>(
> +        &self,
> +        mut buf: &mut W,
> +    ) -> io::Result<usize> {
> +        let filename = self.filename.as_bytes();
> +        let filename_len = filename.len();
> +        let header_size = size_of::<CentralDirectoryFileHeader>();
> +        let zip_field_size = size_of::<Zip64FieldWithOffset>();
> +        let size: usize = header_size + filename_len + zip_field_size;
> +
> +        let (date, time) = epoch_to_dos(self.mtime);
> +
> +        write_struct(
> +            &mut buf,
> +            CentralDirectoryFileHeader {
> +                signature: CENTRAL_DIRECTORY_FH_SIG,
> +                version_made_by: VERSION_MADE_BY,
> +                version_needed: VERSION_NEEDED,
> +                flags: 1 << 3,
> +                compression: 0,
> +                time,
> +                date,
> +                crc32: self.crc32,
> +                compressed_size: 0xFFFFFFFF,
> +                uncompressed_size: 0xFFFFFFFF,
> +                filename_len: filename_len as u16,
> +                extra_field_len: zip_field_size as u16,
> +                comment_len: 0,
> +                start_disk: 0,
> +                internal_flags: 0,
> +                external_flags: (self.mode as u32) << 16 | (!self.is_file as u32) << 4,
> +                offset: 0xFFFFFFFF,
> +            },
> +        )
> +        .await?;
> +
> +        buf.write_all(filename).await?;
> +
> +        write_struct(
> +            &mut buf,
> +            Zip64FieldWithOffset {
> +                field_type: 1,
> +                field_size: 3 * 8,
> +                uncompressed_size: self.uncompressed_size,
> +                compressed_size: self.compressed_size,
> +                offset: self.offset,
> +            },
> +        )
> +        .await?;
> +
> +        Ok(size)
> +    }
> +}
> +
> +/// Wraps a writer that implements AsyncWrite for creating a ZIP archive
> +///
> +/// This will create a ZIP archive on the fly with files added with
> +/// 'add_entry'. To Finish the file, call 'finish'
> +/// Example:
> +/// ```no_run
> +/// use proxmox_backup::tools::zip::*;
> +/// use tokio::fs::File;
> +///
> +/// #[tokio::async]
> +/// async fn main() ->  std::io::Result<()> {
> +///     let target = File::open("foo.zip").await?;
> +///     let mut source = File::open("foo.txt").await?;
> +///
> +///     let mut zip = ZipEncoder::new(target);
> +///     zip.add_entry(ZipEntry {
> +///         "foo.txt",
> +///         0,
> +///         0o100755,
> +///         true,
> +///     }, source).await?;
> +///
> +///     zip.finish().await?
> +///
> +///     Ok(())
> +/// }
> +/// ```
> +pub struct ZipEncoder<W>
> +where
> +    W: AsyncWrite + Unpin,
> +{
> +    byte_count: usize,
> +    files: Vec<ZipEntry>,
> +    target: W,
> +    buf: ByteBuffer,
> +}
> +
> +impl<W: AsyncWrite + Unpin> ZipEncoder<W> {
> +    pub fn new(target: W) -> Self {
> +        Self {
> +            byte_count: 0,
> +            files: Vec::new(),
> +            target,
> +            buf: ByteBuffer::with_capacity(1024*1024),
> +        }
> +    }
> +
> +    pub async fn add_entry<R: AsyncRead + Unpin>(
> +        &mut self,
> +        mut entry: ZipEntry,
> +        content: Option<R>,
> +    ) -> Result<(), Error> {
> +        entry.offset = self.byte_count.try_into()?;
> +        self.byte_count += entry.write_local_header(&mut self.target).await?;
> +        if let Some(mut content) = content {
> +            let mut hasher = Hasher::new();
> +            let mut size = 0;
> +            loop {
> +
> +                let count = self.buf.read_from_async(&mut content).await?;
> +
> +                // end of file
> +                if count == 0 {
> +                    break;
> +                }
> +
> +                size += count;
> +                hasher.update(&self.buf);
> +                self.target.write_all(&self.buf).await?;
> +                self.buf.consume(count);
> +            }
> +
> +            self.byte_count += size;
> +            entry.compressed_size = size.try_into()?;
> +            entry.uncompressed_size = size.try_into()?;
> +            entry.crc32 = hasher.finalize();
> +        }
> +        self.byte_count += entry.write_data_descriptor(&mut self.target).await?;
> +
> +        self.files.push(entry);
> +
> +        Ok(())
> +    }
> +
> +    async fn write_eocd(
> +        &mut self,
> +        central_dir_size: usize,
> +        central_dir_offset: usize,
> +    ) -> Result<(), Error> {
> +        let entrycount = self.files.len();
> +
> +        let mut count = entrycount as u16;
> +        let mut directory_size = central_dir_size as u32;
> +        let mut directory_offset = central_dir_offset as u32;
> +
> +        if central_dir_size > u32::MAX as usize
> +            || central_dir_offset > u32::MAX as usize
> +            || entrycount > u16::MAX as usize
> +        {
> +            count = 0xFFFF;
> +            directory_size = 0xFFFFFFFF;
> +            directory_offset = 0xFFFFFFFF;
> +
> +            write_struct(
> +                &mut self.target,
> +                Zip64EOCDRecord {
> +                    signature: ZIP64_EOCD_RECORD,
> +                    field_size: 44,
> +                    version_made_by: VERSION_MADE_BY,
> +                    version_needed: VERSION_NEEDED,
> +                    disk_number: 0,
> +                    disk_number_central_dir: 0,
> +                    disk_record_count: entrycount.try_into()?,
> +                    total_record_count: entrycount.try_into()?,
> +                    directory_size: central_dir_size.try_into()?,
> +                    directory_offset: central_dir_offset.try_into()?,
> +                },
> +            )
> +            .await?;
> +
> +            let locator_offset = central_dir_offset + central_dir_size;
> +
> +            write_struct(
> +                &mut self.target,
> +                Zip64EOCDLocator {
> +                    signature: ZIP64_EOCD_LOCATOR,
> +                    disk_number: 0,
> +                    offset: locator_offset.try_into()?,
> +                    disk_count: 1,
> +                },
> +            )
> +            .await?;
> +        }
> +
> +        write_struct(
> +            &mut self.target,
> +            EndOfCentralDir {
> +                signature: END_OF_CENTRAL_DIR,
> +                disk_number: 0,
> +                start_disk: 0,
> +                disk_record_count: count,
> +                total_record_count: count,
> +                directory_size,
> +                directory_offset,
> +                comment_len: 0,
> +            },
> +        )
> +        .await?;
> +
> +        Ok(())
> +    }
> +
> +    pub async fn finish(&mut self) -> Result<(), Error> {
> +        let central_dir_offset = self.byte_count;
> +        let mut central_dir_size = 0;
> +
> +        for file in &self.files {
> +            central_dir_size += file
> +                .write_central_directory_header(&mut self.target)
> +                .await?;
> +        }
> +
> +        self.write_eocd(central_dir_size, central_dir_offset)
> +            .await?;
> +
> +        self.target.flush().await?;
> +
> +        Ok(())
> +    }
> +}
> -- 
> 2.20.1





More information about the pbs-devel mailing list