[pbs-devel] [PATCH proxmox v2 5/6] s3-client: add retry logic for transient client errors
Christian Ebner
c.ebner at proxmox.com
Mon Aug 25 15:10:05 CEST 2025
Implements a retry logic with exponentially increasing backoff time
for transient client errors.
For this, clone the requests by destructuring and efficiently
cloneing its body, leveraging Bytes::clone(). Retry up to 3 times,
adding an exponentially increasing backoff time for each retry
starting at 1 second, with the intention to reduce network congestion
and remote system overload.
Signed-off-by: Christian Ebner <c.ebner at proxmox.com>
---
proxmox-s3-client/src/client.rs | 62 +++++++++++++++++++++++++--------
1 file changed, 47 insertions(+), 15 deletions(-)
diff --git a/proxmox-s3-client/src/client.rs b/proxmox-s3-client/src/client.rs
index 64d62c54..e3845111 100644
--- a/proxmox-s3-client/src/client.rs
+++ b/proxmox-s3-client/src/client.rs
@@ -39,6 +39,8 @@ const S3_TCP_KEEPALIVE_TIME: u32 = 120;
const MAX_S3_UPLOAD_RETRY: usize = 3;
// Assumed minimum upload rate of 1 KiB/s for dynamic put object request timeout calculation.
const S3_MIN_ASSUMED_UPLOAD_RATE: u64 = 1024;
+const MAX_S3_HTTP_REQUEST_RETRY: usize = 3;
+const S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT: Duration = Duration::from_secs(1);
/// S3 object key path prefix without the context prefix as defined by the client options.
///
@@ -293,23 +295,53 @@ impl S3Client {
timeout: Option<Duration>,
) -> Result<Response<Incoming>, Error> {
let request = self.prepare(request).await?;
- if request.method() == Method::PUT {
- if let Some(limiter) = &self.put_rate_limiter {
- let sleep = {
- let mut limiter = limiter.lock().unwrap();
- limiter.register_traffic(Instant::now(), 1)
- };
- tokio::time::sleep(sleep).await;
+
+ let (parts, body) = request.into_parts();
+ let body_bytes = body
+ .bytes()
+ .ok_or_else(|| format_err!("cannot prepare request with streaming body"))?;
+
+ let deadline = timeout.map(|timeout| tokio::time::Instant::now() + timeout);
+
+ for retry in 0..MAX_S3_HTTP_REQUEST_RETRY {
+ let request = Request::from_parts(parts.clone(), Body::from(body_bytes.clone()));
+ if parts.method == Method::PUT {
+ if let Some(limiter) = &self.put_rate_limiter {
+ let sleep = {
+ let mut limiter = limiter.lock().unwrap();
+ limiter.register_traffic(Instant::now(), 1)
+ };
+ tokio::time::sleep(sleep).await;
+ }
+ }
+
+ if retry > 0 {
+ let backoff_secs = S3_HTTP_REQUEST_RETRY_BACKOFF_DEFAULT * 3_u32.pow(retry as u32);
+ tokio::time::sleep(backoff_secs).await;
+ }
+
+ let response = if let Some(deadline) = deadline {
+ tokio::time::timeout_at(deadline, self.client.request(request)).await
+ } else {
+ Ok(self.client.request(request).await)
+ };
+
+ match response {
+ Ok(Ok(response)) => return Ok(response),
+ Ok(Err(err)) => {
+ if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 {
+ return Err(err.into());
+ }
+ }
+ Err(_elapsed) => {
+ if retry >= MAX_S3_HTTP_REQUEST_RETRY - 1 {
+ bail!("request timed out exceeding retries");
+ }
+ }
}
}
- let response = if let Some(timeout) = timeout {
- tokio::time::timeout(timeout, self.client.request(request))
- .await
- .context("request timeout")??
- } else {
- self.client.request(request).await?
- };
- Ok(response)
+
+ bail!("failed to send request exceeding retries");
}
/// Check if bucket exists and got permissions to access it.
--
2.47.2
More information about the pbs-devel
mailing list