[pbs-devel] [PATCH proxmox-backup 4/9] backup: add AsyncRead/Seek to CachedChunkReader

Wolfgang Bumiller w.bumiller at proxmox.com
Fri Jun 4 14:30:28 CEST 2021


On Wed, Jun 02, 2021 at 04:38:28PM +0200, Stefan Reiter wrote:
> Implemented as a seperate struct SeekableCachedChunkReader that contains
> the original as an Arc, since the read_at future captures the
> CachedChunkReader, which would otherwise not work with the lifetimes
> required by AsyncRead. This is also the reason we cannot use a shared
> read buffer and have to allocate a new one for every read. It also means
> that the struct items required for AsyncRead/Seek do not need to be
> included in a regular CachedChunkReader.
> 
> This is intended as a replacement for AsyncIndexReader, so we have less
> code duplication and can utilize the LRU cache there too (even though
> actual request concurrency is not supported in these traits).
> 
> Signed-off-by: Stefan Reiter <s.reiter at proxmox.com>
> ---
>  src/backup/cached_chunk_reader.rs | 116 +++++++++++++++++++++++++++++-
>  1 file changed, 114 insertions(+), 2 deletions(-)
> 
> diff --git a/src/backup/cached_chunk_reader.rs b/src/backup/cached_chunk_reader.rs
> index fd5a049f..9b56fd14 100644
> --- a/src/backup/cached_chunk_reader.rs
> +++ b/src/backup/cached_chunk_reader.rs
> @@ -1,12 +1,19 @@
>  //! An async and concurrency safe data reader backed by a local LRU cache.
>  
>  use anyhow::Error;
> +use futures::future::Future;
> +use futures::ready;
> +use tokio::io::{AsyncRead, AsyncSeek, ReadBuf};
>  
> -use std::future::Future;
> +use std::io::SeekFrom;
> +use std::pin::Pin;
>  use std::sync::Arc;
> +use std::task::{Context, Poll};
>  
> -use crate::backup::{AsyncReadChunk, IndexFile};
> +use super::{AsyncReadChunk, IndexFile};
>  use crate::tools::async_lru_cache::{AsyncCacher, AsyncLruCache};
> +use proxmox::io_format_err;
> +use proxmox::sys::error::io_err_other;
>  
>  struct AsyncChunkCacher<T> {
>      reader: Arc<T>,
> @@ -85,3 +92,108 @@ impl<I: IndexFile, R: AsyncReadChunk + Send + Sync + 'static> CachedChunkReader<
>          Ok(read)
>      }
>  }
> +
> +impl<I: IndexFile + Send + Sync + 'static, R: AsyncReadChunk + Send + Sync + 'static>
> +    CachedChunkReader<I, R>
> +{
> +    /// Returns a SeekableCachedChunkReader based on this instance, which implements AsyncSeek and
> +    /// AsyncRead for use in interfaces which require that. Direct use of read_at is preferred
> +    /// otherwise.
> +    pub fn seekable(self) -> SeekableCachedChunkReader<I, R> {
> +        SeekableCachedChunkReader {
> +            index_bytes: self.index.index_bytes(),
> +            reader: Arc::new(self),
> +            position: 0,
> +            seek_to_pos: 0,
> +            read_future: None,
> +        }
> +    }
> +}
> +
> +pub struct SeekableCachedChunkReader<
> +    I: IndexFile + Send + Sync + 'static,
> +    R: AsyncReadChunk + Send + Sync + 'static,
> +> {
> +    reader: Arc<CachedChunkReader<I, R>>,
> +    index_bytes: u64,
> +    position: u64,
> +    seek_to_pos: i64,
> +    read_future: Option<Pin<Box<dyn Future<Output = Result<(Vec<u8>, usize), Error>> + Send>>>,
> +}
> +
> +impl<I, R> AsyncSeek for SeekableCachedChunkReader<I, R>
> +where
> +    I: IndexFile + Send + Sync + 'static,
> +    R: AsyncReadChunk + Send + Sync + 'static,
> +{
> +    fn start_seek(self: Pin<&mut Self>, pos: SeekFrom) -> tokio::io::Result<()> {
> +        let this = Pin::get_mut(self);
> +        this.seek_to_pos = match pos {
> +            SeekFrom::Start(offset) => offset as i64,
> +            SeekFrom::End(offset) => this.index_bytes as i64 + offset,
> +            SeekFrom::Current(offset) => this.position as i64 + offset,
> +        };
> +        Ok(())
> +    }
> +
> +    fn poll_complete(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<tokio::io::Result<u64>> {
> +        let this = Pin::get_mut(self);
> +
> +        let index_bytes = this.index_bytes;
> +        if this.seek_to_pos < 0 {
> +            return Poll::Ready(Err(io_format_err!("cannot seek to negative values")));

IMO we should ditch `seek_to_pos` altogether, move the error handling
into `start_seek` and just always return
`Poll::Ready(Ok(this.position))` straightaway.
Unless there's a reason to split this up? Other resources don't
guarantee much of anything if you start read/writing *between*
`start_seek`/`poll_complete` after all.

> +        } else if this.seek_to_pos > index_bytes as i64 {
> +            this.position = index_bytes;
> +        } else {
> +            this.position = this.seek_to_pos as u64;
> +        }
> +
> +        Poll::Ready(Ok(this.position))
> +    }
> +}
> +
> +impl<I, R> AsyncRead for SeekableCachedChunkReader<I, R>
> +where
> +    I: IndexFile + Send + Sync + 'static,
> +    R: AsyncReadChunk + Send + Sync + 'static,
> +{
> +    fn poll_read(
> +        self: Pin<&mut Self>,
> +        cx: &mut Context,
> +        buf: &mut ReadBuf,
> +    ) -> Poll<tokio::io::Result<()>> {
> +        let this = Pin::get_mut(self);
> +
> +        let fut = match this.read_future {
> +            Some(ref mut fut) => fut,
> +            None => {
> +                let offset = this.position;
> +                let wanted = buf.capacity();
> +                let reader = Arc::clone(&this.reader);
> +                let fut = Box::pin(async move {
> +                    let mut read_buf = vec![0u8; wanted];
> +                    let read = reader.read_at(&mut read_buf[..wanted], offset).await?;
> +                    Ok((read_buf, read))
> +                });
> +                this.read_future = Some(fut);
> +                this.read_future.as_mut().unwrap()
> +            }
> +        };

Your `None` case seems trivial enough that you could use the Option's
`.get_or_insert_with()` instead of match with `ref mut` and `.as_mut().unwrap()`
(since the `None` case has no error cases)

> +
> +        let ret = match ready!(fut.as_mut().poll(cx)) {
> +            Ok((read_buf, read)) => {
> +                buf.put_slice(&read_buf[..read]);
> +                this.position += read as u64;
> +                Ok(())
> +            }
> +            Err(err) => {
> +                Err(io_err_other(err))
> +            }
> +        };
> +
> +        // future completed, drop
> +        let _drop = this.read_future.take();

Why not just `this.read_future = None;` ?

> +
> +        Poll::Ready(ret)
> +    }
> +}
> -- 
> 2.30.2





More information about the pbs-devel mailing list