[pbs-devel] [PATCH v4 pxar 2/3] accessor: adapt and restrict contents access

Christian Ebner c.ebner at proxmox.com
Wed Jun 12 15:17:12 CEST 2024


Add checks for split variant inputs when accessing the payload
contents via the accessor instance. Both cases, accessing via the
safe `contents` method and via the previousely unsafe
`open_contents_at_range` call are covered.

Reduce possible misuse by wrapping the current plain content range
into an opaque `ContentRange` type with an additional optional
payload reference field to check consistency between the payload
reference encoded in the metadata archive and the payload header'
found in the payload data archive.

Because of the additional type wrapping and the payload header check,
the `open_contents_at_range` is considered safe now, dropping the
previously unsafe implementation.
The corresponding interfaces have been adapted accordingly.

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
changes since version 3:
- move `ContentRange` to accessor
- move payload header check to `FileContentsImpl` new, make it private
- drop unsafe for `open_contents_at_range`
- refactor

 src/accessor/aio.rs  | 16 ++++++-----
 src/accessor/mod.rs  | 68 ++++++++++++++++++++++++++++++++------------
 src/accessor/sync.rs | 16 ++++++-----
 3 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/src/accessor/aio.rs b/src/accessor/aio.rs
index 73b1025..eb89f8f 100644
--- a/src/accessor/aio.rs
+++ b/src/accessor/aio.rs
@@ -7,14 +7,13 @@
 use std::future::Future;
 use std::io;
 use std::mem;
-use std::ops::Range;
 use std::os::unix::fs::FileExt;
 use std::path::Path;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::{Context, Poll};
 
-use crate::accessor::{self, cache::Cache, MaybeReady, ReadAt, ReadAtOperation};
+use crate::accessor::{self, cache::Cache, ContentRange, MaybeReady, ReadAt, ReadAtOperation};
 use crate::decoder::aio::Decoder;
 use crate::format::GoodbyeItem;
 use crate::util;
@@ -153,13 +152,16 @@ impl<T: Clone + ReadAt> Accessor<T> {
     ///
     /// This will provide a reader over an arbitrary range of the archive file, so unless this
     /// comes from a actual file entry data, the contents might not make much sense.
-    pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContents<T> {
-        FileContents {
-            inner: unsafe { self.inner.open_contents_at_range(range) },
+    pub async fn open_contents_at_range(
+        &self,
+        range: &ContentRange,
+    ) -> io::Result<FileContents<T>> {
+        Ok(FileContents {
+            inner: self.inner.open_contents_at_range(range).await?,
             at: 0,
             buffer: Vec::new(),
             future: None,
-        }
+        })
     }
 
     /// Following a hardlink.
@@ -235,7 +237,7 @@ impl<T: Clone + ReadAt> FileEntry<T> {
     }
 
     /// For use with unsafe accessor methods.
-    pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
+    pub fn content_range(&self) -> io::Result<Option<ContentRange>> {
         self.inner.content_range()
     }
 
diff --git a/src/accessor/mod.rs b/src/accessor/mod.rs
index 92d689d..48605eb 100644
--- a/src/accessor/mod.rs
+++ b/src/accessor/mod.rs
@@ -17,7 +17,7 @@ use endian_trait::Endian;
 
 use crate::binary_tree_array;
 use crate::decoder::{self, DecoderImpl};
-use crate::format::{self, FormatVersion, GoodbyeItem};
+use crate::format::{self, FormatVersion, GoodbyeItem, PayloadRef};
 use crate::util;
 use crate::{Entry, EntryKind, PxarVariant};
 
@@ -54,6 +54,16 @@ impl EntryRangeInfo {
     }
 }
 
+/// Stores a content range to be accessed via the `Accessor` as well as the payload reference to
+/// perform consistency checks on payload references for archives accessed via split variant input.
+#[derive(Clone)]
+pub struct ContentRange {
+    // Range of the content
+    content: Range<u64>,
+    // Optional payload ref
+    payload_ref: Option<PayloadRef>,
+}
+
 /// awaitable version of `ReadAt`.
 async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize>
 where
@@ -335,13 +345,12 @@ impl<T: Clone + ReadAt> AccessorImpl<T> {
         })
     }
 
-    /// Allow opening arbitrary contents from a specific range.
-    pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
-        if let Some((payload_input, _)) = &self.input.payload() {
-            FileContentsImpl::new(payload_input.clone(), range)
-        } else {
-            FileContentsImpl::new(self.input.archive().clone(), range)
-        }
+    /// Open contents at provided range
+    pub async fn open_contents_at_range(
+        &self,
+        range: &ContentRange,
+    ) -> io::Result<FileContentsImpl<T>> {
+        FileContentsImpl::new(&self.input, range).await
     }
 
     /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
@@ -758,7 +767,7 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
     }
 
     /// For use with unsafe accessor methods.
-    pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
+    pub fn content_range(&self) -> io::Result<Option<ContentRange>> {
         match self.entry.kind {
             EntryKind::File { offset: None, .. } => {
                 io_bail!("cannot open file, reader provided no offset")
@@ -767,7 +776,10 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
                 size,
                 offset: Some(offset),
                 payload_offset: None,
-            } => Ok(Some(offset..(offset + size))),
+            } => Ok(Some(ContentRange {
+                content: offset..(offset + size),
+                payload_ref: None,
+            })),
             // Payload offset beats regular offset if some
             EntryKind::File {
                 size,
@@ -775,7 +787,13 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
                 payload_offset: Some(payload_offset),
             } => {
                 let start_offset = payload_offset + size_of::<format::Header>() as u64;
-                Ok(Some(start_offset..start_offset + size))
+                Ok(Some(ContentRange {
+                    content: start_offset..start_offset + size,
+                    payload_ref: Some(PayloadRef {
+                        offset: payload_offset,
+                        size,
+                    }),
+                }))
             }
             _ => Ok(None),
         }
@@ -785,11 +803,8 @@ impl<T: Clone + ReadAt> FileEntryImpl<T> {
         let range = self
             .content_range()?
             .ok_or_else(|| io_format_err!("not a file"))?;
-        if let Some((ref payload_input, _)) = self.input.payload() {
-            Ok(FileContentsImpl::new(payload_input.clone(), range))
-        } else {
-            Ok(FileContentsImpl::new(self.input.archive().clone(), range))
-        }
+
+        FileContentsImpl::new(&self.input, &range).await
     }
 
     #[inline]
@@ -897,8 +912,25 @@ pub(crate) struct FileContentsImpl<T> {
 }
 
 impl<T: Clone + ReadAt> FileContentsImpl<T> {
-    pub fn new(input: T, range: Range<u64>) -> Self {
-        Self { input, range }
+    async fn new(
+        input: &PxarVariant<T, (T, Range<u64>)>,
+        range: &ContentRange,
+    ) -> io::Result<Self> {
+        let (input, range) = if let Some((payload_input, payload_range)) = input.payload() {
+            if let Some(payload_ref) = &range.payload_ref {
+                let header: format::Header =
+                    read_entry_at(payload_input, payload_ref.offset).await?;
+                format::check_payload_header_and_size(&header, payload_ref.size)?;
+            }
+            if payload_range.start > range.content.start || payload_range.end < range.content.end {
+                io_bail!("out of range access for payload");
+            }
+            (payload_input.clone(), range.content.clone())
+        } else {
+            (input.archive().clone(), range.content.clone())
+        };
+
+        Ok(Self { input, range })
     }
 
     #[inline]
diff --git a/src/accessor/sync.rs b/src/accessor/sync.rs
index df2ed23..76e8c03 100644
--- a/src/accessor/sync.rs
+++ b/src/accessor/sync.rs
@@ -1,14 +1,13 @@
 //! Blocking `pxar` random access handling.
 
 use std::io;
-use std::ops::Range;
 use std::os::unix::fs::FileExt;
 use std::path::Path;
 use std::pin::Pin;
 use std::sync::Arc;
 use std::task::Context;
 
-use crate::accessor::{self, cache::Cache, MaybeReady, ReadAt, ReadAtOperation};
+use crate::accessor::{self, cache::Cache, ContentRange, MaybeReady, ReadAt, ReadAtOperation};
 use crate::decoder::Decoder;
 use crate::format::GoodbyeItem;
 use crate::util::poll_result_once;
@@ -142,11 +141,14 @@ impl<T: Clone + ReadAt> Accessor<T> {
     ///
     /// This will provide a reader over an arbitrary range of the archive file, so unless this
     /// comes from a actual file entry data, the contents might not make much sense.
-    pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContents<T> {
-        FileContents {
-            inner: unsafe { self.inner.open_contents_at_range(range) },
+    pub unsafe fn open_contents_at_range(
+        &self,
+        range: &ContentRange,
+    ) -> io::Result<FileContents<T>> {
+        Ok(FileContents {
+            inner: poll_result_once(self.inner.open_contents_at_range(range))?,
             at: 0,
-        }
+        })
     }
 
     /// Following a hardlink.
@@ -291,7 +293,7 @@ impl<T: Clone + ReadAt> FileEntry<T> {
     }
 
     /// For use with unsafe accessor methods.
-    pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
+    pub fn content_range(&self) -> io::Result<Option<ContentRange>> {
         self.inner.content_range()
     }
 
-- 
2.39.2





More information about the pbs-devel mailing list