[pbs-devel] [PATCH proxmox 5/5] s3-client: add retry logic for transient client errors

Christian Ebner c.ebner at proxmox.com
Mon Aug 25 12:32:48 CEST 2025


Implements a retry logic with linearly increasing backoff time for
transient client errors.

For this, clone the requests by destructuring and efficiently
cloneing its body, leveraging Bytes::clone(). Retry up to 3 times,
adding a linearly increasing backoff time as multiples of 100 ms
for each retry, with the intention to reduce network congestion
and remote system overload [0].

[0] https://aws.amazon.com/builders-library/timeouts-retries-and-backoff-with-jitter/

Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
 proxmox-s3-client/src/client.rs | 60 ++++++++++++++++++++++++---------
 1 file changed, 45 insertions(+), 15 deletions(-)

diff --git a/proxmox-s3-client/src/client.rs b/proxmox-s3-client/src/client.rs
index 559990a1..3c9ba009 100644
--- a/proxmox-s3-client/src/client.rs
+++ b/proxmox-s3-client/src/client.rs
@@ -39,6 +39,8 @@ const S3_TCP_KEEPALIVE_TIME: u32 = 30 * 60;
 const MAX_S3_UPLOAD_RETRY: usize = 3;
 // Assumed minimum upload rate of 1 KiB/s for dynamic put object request timeout calculation.
 const S3_MIN_ASSUMED_UPLOAD_RATE: u64 = 1024;
+const MAX_S3_HTTP_REQUEST_RETRY: usize = 3;
+const S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT: Duration = Duration::from_millis(100);
 
 /// S3 object key path prefix without the context prefix as defined by the client options.
 ///
@@ -293,23 +295,51 @@ impl S3Client {
         timeout: Option<Duration>,
     ) -> Result<Response<Incoming>, Error> {
         let request = self.prepare(request).await?;
-        if request.method() == Method::PUT {
-            if let Some(limiter) = &self.put_rate_limiter {
-                let sleep = {
-                    let mut limiter = limiter.lock().unwrap();
-                    limiter.register_traffic(Instant::now(), 1)
-                };
-                tokio::time::sleep(sleep).await;
+
+        let (parts, body) = request.into_parts();
+        let body_bytes = body
+            .bytes()
+            .ok_or_else(|| format_err!("cannot prepare request with streaming body"))?;
+
+        for retry in 0..MAX_S3_HTTP_REQUEST_RETRY {
+            let request = Request::from_parts(parts.clone(), Body::from(body_bytes.clone()));
+            if parts.method == Method::PUT {
+                if let Some(limiter) = &self.put_rate_limiter {
+                    let sleep = {
+                        let mut limiter = limiter.lock().unwrap();
+                        limiter.register_traffic(Instant::now(), 1)
+                    };
+                    tokio::time::sleep(sleep).await;
+                }
+            }
+
+            if retry > 0 {
+                let backoff_millis = S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT * 2 * retry as u32;
+                tokio::time::sleep(backoff_millis).await;
+            }
+
+            let response = if let Some(timeout) = timeout {
+                tokio::time::timeout(timeout, self.client.request(request)).await
+            } else {
+                Ok(self.client.request(request).await)
+            };
+
+            match response {
+                Ok(Ok(response)) => return Ok(response),
+                Ok(Err(err)) => {
+                    if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 {
+                        return Err(err.into());
+                    }
+                }
+                Err(_elapsed) => {
+                    if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 {
+                        bail!("request timed out exceeding retries");
+                    }
+                }
             }
         }
-        let response = if let Some(timeout) = timeout {
-            tokio::time::timeout(timeout, self.client.request(request))
-                .await
-                .context("request timeout")??
-        } else {
-            self.client.request(request).await?
-        };
-        Ok(response)
+
+        bail!("failed to send request exceeding retries");
     }
 
     /// Check if bucket exists and got permissions to access it.
-- 
2.47.2





More information about the pbs-devel mailing list