[pbs-devel] [PATCH proxmox-backup 2/2] api2/node/termproxy: fix zombies on worker abort

Wolfgang Bumiller w.bumiller at proxmox.com
Tue Jul 28 14:16:00 CEST 2020


On Mon, Jul 27, 2020 at 08:54:49AM +0200, Dominik Csapak wrote:
> tokios kill_on_drop sometimes leaves zombies around, especially
> when there is not another tokio::process::Command spawned after
> 
> so instead of relying on the 'kill_on_drop' feature, we explicitly
> kill the child on a worker abort. to be able to do this
> we have to use 'tokio::select' instead of 'futures::select' since
> the latter requires the future to be fused, which consumes the
> child handle, leaving us no possibility to kill it after fusing.
> (tokio::select does not need the futures to be fused, so we
> can reuse the child future after the select again)
> 
> Signed-off-by: Dominik Csapak <d.csapak at proxmox.com>
> ---
>  src/api2/node.rs | 27 +++++++++++++++++----------
>  1 file changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/src/api2/node.rs b/src/api2/node.rs
> index e6ecc85..011717f 100644
> --- a/src/api2/node.rs
> +++ b/src/api2/node.rs
> @@ -2,10 +2,7 @@ use std::net::TcpListener;
>  use std::os::unix::io::AsRawFd;
>  
>  use anyhow::{bail, format_err, Error};
> -use futures::{
> -    future::{FutureExt, TryFutureExt},
> -    select,
> -};
> +use futures::future::{FutureExt, TryFutureExt};
>  use hyper::body::Body;
>  use hyper::http::request::Parts;
>  use hyper::upgrade::Upgraded;
> @@ -170,7 +167,6 @@ async fn termproxy(
>              let mut cmd = tokio::process::Command::new("/usr/bin/termproxy");
>  
>              cmd.args(&arguments)
> -                .kill_on_drop(true)
>                  .stdout(std::process::Stdio::piped())
>                  .stderr(std::process::Stdio::piped());
>  
> @@ -197,8 +193,9 @@ async fn termproxy(
>                  Ok::<(), Error>(())
>              };
>  
> -            select!{
> -                res = child.fuse() => {
> +            let mut needs_kill = false;
> +            let res = tokio::select!{
> +                res = &mut child => {
>                      let exit_code = res?;
>                      if !exit_code.success() {
>                          match exit_code.code() {
> @@ -208,10 +205,20 @@ async fn termproxy(
>                      }
>                      Ok(())
>                  },
> -                res = stdout_fut.fuse() => res,
> -                res = stderr_fut.fuse() => res,
> -                res = worker.abort_future().fuse() => res.map_err(Error::from),
> +                res = stdout_fut => res,
> +                res = stderr_fut => res,
> +                res = worker.abort_future() => {
> +                    needs_kill = true;
> +                    res.map_err(Error::from)
> +                }
> +            };
> +
> +            if needs_kill {

I think we should log if res is an `Err` at this point, as this would
indicate an *error* with the `abort_future`, which IMO shouldn't happen
but also not quitely discarded which the two `?` below would do (since
they return on error).

> +                child.kill()?;
> +                child.await?;
>              }
> +
> +            res
>          },
>      )?;
>  
> -- 
> 2.20.1





More information about the pbs-devel mailing list