[pve-devel] [PATCH storage 1/1] fix #1710: add retrieve method for storage

Dominik Csapak d.csapak at proxmox.com
Thu Apr 29 13:54:46 CEST 2021


thanks for the patch, a few comments inline

On 4/28/21 16:13, Lorenz Stechauner wrote:
> Users are now able to download/retrieve any .iso/... file onto their
> storages and verify file integrity with checksums.
> 
> Signed-off-by: Lorenz Stechauner <l.stechauner at proxmox.com>
> ---
>   PVE/API2/Storage/Status.pm | 244 +++++++++++++++++++++++++++++++++++++
>   1 file changed, 244 insertions(+)
> 
> diff --git a/PVE/API2/Storage/Status.pm b/PVE/API2/Storage/Status.pm
> index 897b4a7..3919be7 100644
> --- a/PVE/API2/Storage/Status.pm
> +++ b/PVE/API2/Storage/Status.pm
> @@ -5,6 +5,8 @@ use warnings;
>   
>   use File::Path;
>   use File::Basename;
> +use HTTP::Request;
> +use LWP::UserAgent;
>   use PVE::Tools;
>   use PVE::INotify;
>   use PVE::Cluster;
> @@ -497,4 +499,246 @@ __PACKAGE__->register_method ({
>   	return $upid;
>      }});
>   
> +__PACKAGE__->register_method({
> +    name => 'retrieve',
> +    path => '{storage}/retrieve',
> +    method => 'POST',
> +    description => "Download templates and ISO images by using an URL.",
> +    permissions => {
> +	check => ['perm', '/storage/{storage}', ['Datastore.AllocateTemplate']],
> +    },
> +    protected => 1,
> +    parameters => {
> +	additionalProperties => 0,
> +	properties => {
> +	    node => get_standard_option('pve-node'),
> +	    storage => get_standard_option('pve-storage-id'),
> +	    url => {
> +		description => "The URL to retrieve the file from.",
> +		type => 'string',
> +	    },

i am not quite sure if it is a good idea to have this feature
unrestricted for everybody who can download a template

it possibly gives access to an internal network to which
the users does not have access otherwise...

maybe we want to give the admin control over allow- and/or blocklists ?


> +	    content => {
> +		description => "Content type.",
> +		type => 'string', format => 'pve-storage-content',
> +	    },
> +	    filename => {
> +		description => "The name of the file to create. Alternatively the file name given by the server will be used.",
> +		type => 'string',
> +		optional => 1,
> +	    },
> +	    checksum => {
> +		description => "The expected checksum of the file.",
> +		type => 'string',
> +		optional => 1,
> +	    },
> +	    checksumalg => {
> +		description => "The algorithm to claculate the checksum of the file.",
> +		type => 'string',
> +		optional => 1,
> +	    },

we can define enums in the api like so:

type => 'string',
enum => ['val1', 'val2'],

also we can define dependencies here with
requires => 'checksumalg'
for checksum

> +	    metaonly => {
> +		description => "Only return the file name and size.",
> +		type => 'boolean',
> +		optional => 1,
> +	    },

generally i do not like this design of the api call that does two things
i'd prefer to have 2 api calls:

query_url (or smthg like this), checks the url for filename/size
retrieve, actually downloads the file

> +	    insecure => {
> +		description => "Allow TLS certificates to be invalid.",
> +		type => 'boolean',
> +		optional => 1,
> +	    } > +	},
> +    },
> +    returns => {
> +	type => "object",
> +	properties => {
> +	    filename => { type => 'string' },
> +	    upid => { type => 'string' },
> +	    size => {
> +		type => 'integer',
> +		renderer => 'bytes',
> +	    },
> +	},
> +    },
> +    code => sub {
> +	my ($param) = @_;
> +
> +	my @hash_algs = ['md5', 'sha1', 'sha224', 'sha256', 'sha384', 'sha512'];

as written above, can be handled by api

> +
> +	my $rpcenv = PVE::RPCEnvironment::get();
> +
> +	my $user = $rpcenv->get_user();
> +
> +	my $cfg = PVE::Storage::config();
> +
> +	my $node = $param->{node};
> +	my $scfg = PVE::Storage::storage_check_enabled($cfg, $param->{storage}, $node);
> +
> +	die "can't upload to storage type '$scfg->{type}'"
> +	    if !defined($scfg->{path});
> +
> +	my $content = $param->{content};
> +
> +	my $url = $param->{url};
> +
> +	die "invalid https or http url"
> +	    if $url !~ qr!^https?://!;
> +
> +	my $checksum = $param->{checksum};
> +	my $hash_alg = $param->{checksumalg};
> +
> +	$hash_alg = undef
> +	    if $hash_alg eq 'none';
> +
> +	die "either 'checksum' and 'checksumalg' or none of these have to be specified"
> +	    if ($checksum && !$hash_alg) || (!$checksum && $hash_alg);\

as written above, can also be handled by api

> +
> +	die "unsupported checksumalg: '$hash_alg'"
> +	    if $hash_alg && ($hash_alg !~ /^(md5|sha1|sha(224|256|384|512))$/);

same

> +
> +	my $req = HTTP::Request->new(HEAD => $url);
> +	my $ua = LWP::UserAgent->new();
> +	my $res = $ua->request($req);
> +
> +	die "invalid server response: '" . $res->status_line() . "'"
> +	    if ($res->code() != 200);
> +
> +	my $size = $res->header("Content-Length");
> +	my $dispo = $res->header("Content-Disposition");
> +
> +	my $filename_remote;
> +
> +	if ($dispo && $dispo =~ m/filename=(.+)/) {
> +	    $filename_remote = $1;
> +	} elsif ($url =~ m!^[^?]+/([^?/]*)(?:\?.*)?$!) {
> +	    $filename_remote = $1;
> +	}
> +
> +	chomp $filename_remote;
> +	$filename_remote =~ s/^.*[\/\\]//;
> +	$filename_remote =~ s/[^-a-zA-Z0-9_.]/_/g;

since we also use this in 'upload' and below again,
it may be nice to put that in a function and reuse
it (so they cannot diverge)

> +
> +	if ($param->{metaonly}) {
> +	    return {
> +		filename => $filename_remote,
> +		upid => 0,
> +		size => $size + 0,
> +	    };
> +	}
> +
> +	my $filename = $param->{filename} || $filename_remote;

please use '//' here, since this also triggers if my filename is "0"

> +	chomp $filename;
> +	$filename =~ s/^.*[\/\\]//;
> +	$filename =~ s/[^-a-zA-Z0-9_.]/_/g;
> +
> +	my $path;
> +
> +	if ($content eq 'iso') {
> +	    if ($filename !~ m![^/]+$PVE::Storage::iso_extension_re$!) {
> +		raise_param_exc({ filename => "missing '.iso' or '.img' extension" });
> +	    }
> +	    $path = PVE::Storage::get_iso_dir($cfg, $param->{storage});
> +	} elsif ($content eq 'vztmpl') {
> +	    if ($filename !~ m![^/]+\.tar\.[gx]z$!) {
> +		raise_param_exc({ filename => "missing '.tar.gz' or '.tar.xz' extension" });
> +	    }
> +	    $path = PVE::Storage::get_vztmpl_dir($cfg, $param->{storage});
> +	} else {
> +	    raise_param_exc({ content => "upload content type '$content' not allowed" });
> +	}

i'd put this check (the 'not allowed' one) above, before doing any request

> +
> +	die "storage '$param->{storage}' does not support '$content' content"
> +	    if !$scfg->{content}->{$content};

as well

> +
> +	my $dest = "$path/$filename";
> +	my $dirname = dirname($dest);

should that not be simply '$path' ?
or do we allow subdirectories?
(afair, those would not appear in the content of the storage)

> +
> +	my $cmd;
> +	my $cmd_check;
> +	my $cmd_check_flat;
> +	my $cmd_del;
> +
> +	my @curlcmd = ('curl', '-L', '-o', $dest, '-f');
> +	push @curlcmd, '--insecure'
> +	    if $param->{insecure};
> +
> +	my @check1 = ('echo', $checksum, $dest);
> +	my @check2 = ("${hash_alg}sum", '-c');
> +
> +	my @unlinkcmd = ('rm', '-f', $dest);
> +
> +	if ($node ne 'localhost' && $node ne PVE::INotify::nodename()) {

this should not be necessary here, we can define

proxyto => '{node}',
in the api definition, then
the pveproxy automatically proxies the request to the correct node and
it will be executed thhere

> +	    my $remip = PVE::Cluster::remote_node_ip($node);
> +
> +	    my @ssh_options = ('-o', 'BatchMode=yes');
> +
> +	    my @remcmd = ('/usr/bin/ssh', @ssh_options, $remip, '--');
> +
> +	    eval {
> +		# activate remote storage
> +		PVE::Tools::run_command([@remcmd, '/usr/sbin/pvesm', 'status', '--storage', $param->{storage}]);
> +	    };
> +	    die "can't activate storage '$param->{storage}' on node '$node': $@"
> +		if $@;
> +
> +	    PVE::Tools::run_command([@remcmd, '/bin/mkdir', '-p', '--', $dirname],
> +				    errmsg => "mkdir failed");
> +
> +	    $cmd = [@remcmd, @curlcmd, PVE::Tools::shell_quote($url)];
> +
> +	    $cmd_check = [@remcmd, @check1, '|', @check2];
> +	    $cmd_check_flat = [@remcmd, @check1, '|', @check2];
> +
> +	    $cmd_del = [@remcmd, @unlinkcmd];
> +	} else {
> +	    PVE::Storage::activate_storage($cfg, $param->{storage});
> +	    File::Path::make_path($dirname);
> +
> +	    $cmd = [@curlcmd, $url];
> +
> +	    $cmd_check = [[@check1], [@check2]];
> +	    $cmd_check_flat = [@check1, '|', @check2];
> +
> +	    $cmd_del = [@unlinkcmd];
> +	}
> +
> +	my $worker = sub {
> +	    my $upid = shift;
> +
> +	    print "starting file download from: $url\n";
> +	    print "target node: $node\n";
> +	    print "target file: $dest\n";
> +	    print "file size is: $size\n";
> +	    print "command: " . join(' ', @$cmd) . "\n";
> +
> +	    eval { PVE::Tools::run_command($cmd, errmsg => 'download failed'); };
> +	    if (my $err = $@) {
> +		PVE::Tools::run_command($cmd_del);
> +		die $err;
> +	    }
> +	    print "finished file download successfully\n";
> +
> +	    if ($checksum) {
> +		print "validating checksum...\n";
> +		print "expected $hash_alg checksum is: $checksum\n";
> +		print "checksum validation command: " . join(' ', @$cmd_check_flat) . "\n";
> +
> +		eval { PVE::Tools::run_command($cmd_check, errmsg => 'checksum mismatch'); };
> +		if (my $err = $@) {
> +		    PVE::Tools::run_command($cmd_del);
> +		    die $err;
> +		}
> +		print "validated checksum successfully\n";
> +	    }
> +	};
> +
> +	my $upid = $rpcenv->fork_worker('imgdownload', undef, $user, $worker);
> +
> +	return {
> +	    filename => $filename,
> +	    upid => $upid,
> +	    size => $size + 0,
> +	};
> +    }});
> +
> +
>   1;
> 






More information about the pve-devel mailing list