[pbs-devel] [PATCH proxmox 1/2] proxmox-compression: add async tar builder

Dominik Csapak d.csapak at proxmox.com
Tue Apr 12 13:04:13 CEST 2022


inspired by tar::Builder, but limited to the things we need and using
AsyncRead+AsyncWrite instead of the sync variants.

Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
---
 proxmox-compression/Cargo.toml |   1 +
 proxmox-compression/src/lib.rs |   1 +
 proxmox-compression/src/tar.rs | 172 +++++++++++++++++++++++++++++++++
 3 files changed, 174 insertions(+)
 create mode 100644 proxmox-compression/src/tar.rs

diff --git a/proxmox-compression/Cargo.toml b/proxmox-compression/Cargo.toml
index 0b9edf5..c3f7f49 100644
--- a/proxmox-compression/Cargo.toml
+++ b/proxmox-compression/Cargo.toml
@@ -17,6 +17,7 @@ flate2 = "1.0"
 futures = "0.3"
 tokio = { version = "1.6", features = [ "fs", "io-util"] }
 walkdir = "2"
+tar = "0.4"
 
 proxmox-time = { path = "../proxmox-time", version = "1" }
 proxmox-io = { path = "../proxmox-io", version = "1", features = [ "tokio" ] }
diff --git a/proxmox-compression/src/lib.rs b/proxmox-compression/src/lib.rs
index 05cf06b..e9dd113 100644
--- a/proxmox-compression/src/lib.rs
+++ b/proxmox-compression/src/lib.rs
@@ -1,4 +1,5 @@
 mod compression;
 pub use compression::*;
 
+pub mod tar;
 pub mod zip;
diff --git a/proxmox-compression/src/tar.rs b/proxmox-compression/src/tar.rs
new file mode 100644
index 0000000..59a8cc1
--- /dev/null
+++ b/proxmox-compression/src/tar.rs
@@ -0,0 +1,172 @@
+//! tar helper
+use std::io;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Component, Path, PathBuf};
+use std::str;
+
+use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt};
+
+use tar::{EntryType, Header};
+
+/// An async Builder for tar archives based on [tar::Builder]
+///
+/// Wraps an inner [AsyncWrite] struct to write into.
+/// Must call [finish()](Builder::finish) to write trailer + close
+/// # Example
+///
+/// ```
+/// use tar::{EntryType, Header};
+/// use proxmox_compression::tar::Builder;
+///
+/// # async fn foo() {
+/// let mut tar = Builder::new(Vec::new());
+///
+/// // Add file
+/// let mut header = Header::new_gnu();
+/// let mut data: &[u8] = &[1, 2, 3];
+/// header.set_size(data.len() as u64);
+/// tar.add_entry(&mut header, "foo", data).await.unwrap();
+///
+/// // Add symlink
+/// let mut header = Header::new_gnu();
+/// header.set_entry_type(EntryType::Symlink);
+/// tar.add_link(&mut header, "bar", "foo").await.unwrap();
+///
+/// // must call finish at the end
+/// let data = tar.finish().await.unwrap();
+/// # }
+/// ```
+pub struct Builder<W: AsyncWrite + Unpin> {
+    inner: W,
+}
+
+impl<W: AsyncWrite + Unpin> Builder<W> {
+    /// Takes an AsyncWriter as target
+    pub fn new(inner: W) -> Builder<W> {
+        Builder {
+            inner,
+        }
+    }
+
+    async fn add<R: AsyncRead + Unpin>(
+        &mut self,
+        header: &Header,
+        mut data: R,
+    ) -> io::Result<()> {
+        append_data(&mut self.inner, header, &mut data).await
+    }
+
+    /// Adds a new entry to this archive with the specified path.
+    pub async fn add_entry<P: AsRef<Path>, R: AsyncRead + Unpin>(
+        &mut self,
+        header: &mut Header,
+        path: P,
+        data: R,
+    ) -> io::Result<()> {
+        append_path_header(&mut self.inner, header, path.as_ref()).await?;
+        header.set_cksum();
+        self.add(&header, data).await
+    }
+
+    /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
+    pub async fn add_link<P: AsRef<Path>, T: AsRef<Path>>(
+        &mut self,
+        header: &mut Header,
+        path: P,
+        target: T,
+    ) -> io::Result<()> {
+        append_path_header(&mut self.inner, header, path.as_ref()).await?;
+
+        // try to set the linkame, fallback to gnu extension header otherwise
+        if let Err(err) = header.set_link_name(target.as_ref()) {
+            let link_name = target.as_ref().as_os_str().as_bytes();
+            if link_name.len() < header.as_old().linkname.len() {
+                return Err(err);
+            }
+            // add trailing '\0'
+            let mut ext_data = link_name.chain(tokio::io::repeat(0).take(1));
+            let extension = get_gnu_header(link_name.len() as u64 + 1, EntryType::GNULongLink);
+            append_data(&mut self.inner, &extension, &mut ext_data).await?;
+        }
+        header.set_cksum();
+        self.add(&header, tokio::io::empty()).await
+    }
+
+    /// Finish the archive and flush the underlying writer
+    ///
+    /// Consumes the Builder. This must be called when finishing the archive.
+    /// Flushes the inner writer and returns it.
+    pub async fn finish(mut self) -> io::Result<W> {
+        self.inner.write_all(&[0; 1024]).await?;
+        self.inner.flush().await?;
+        Ok(self.inner)
+    }
+}
+
+async fn append_data<W: AsyncWrite + Unpin, R: AsyncRead + Unpin>(
+    mut dst: &mut W,
+    header: &Header,
+    mut data: &mut R,
+) -> io::Result<()> {
+    dst.write_all(header.as_bytes()).await?;
+    let len = tokio::io::copy(&mut data, &mut dst).await?;
+
+    // Pad with zeros if necessary.
+    let buf = [0; 512];
+    let remaining = 512 - (len % 512);
+    if remaining < 512 {
+        dst.write_all(&buf[..remaining as usize]).await?;
+    }
+
+    Ok(())
+}
+
+fn get_gnu_header(size: u64, entry_type: EntryType) -> Header {
+    let mut header = Header::new_gnu();
+    let name = b"././@LongLink";
+    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
+    header.set_mode(0o644);
+    header.set_uid(0);
+    header.set_gid(0);
+    header.set_mtime(0);
+    header.set_size(size);
+    header.set_entry_type(entry_type);
+    header.set_cksum();
+    header
+}
+
+// tries to set the path in header, or add a gnu header with 'LongName'
+async fn append_path_header<W: AsyncWrite + Unpin>(
+    dst: &mut W,
+    header: &mut Header,
+    path: &Path,
+) -> io::Result<()> {
+    let mut relpath = PathBuf::new();
+    let components = path.components();
+    for comp in components {
+        if Component::RootDir == comp {
+            continue;
+        }
+        relpath.push(comp);
+    }
+    // try to set the path directly, fallback to gnu extension header otherwise
+    if let Err(err) = header.set_path(&relpath) {
+        let data = relpath.as_os_str().as_bytes();
+        let max = header.as_old().name.len();
+        if data.len() < max {
+            return Err(err);
+        }
+        // add trailing '\0'
+        let mut ext_data = data.chain(tokio::io::repeat(0).take(1));
+        let extension = get_gnu_header(data.len() as u64 + 1, EntryType::GNULongName);
+        append_data(dst, &extension, &mut ext_data).await?;
+
+        // add the path as far as we can
+        let truncated = match str::from_utf8(&data[..max]) {
+            Ok(truncated) => truncated,
+            Err(err) => str::from_utf8(&data[..err.valid_up_to()]).unwrap(),
+        };
+        header.set_path(truncated)?;
+    }
+    Ok(())
+}
-- 
2.30.2






More information about the pbs-devel mailing list