[pve-devel] [PATCH v6 qemu-server 2/6] mtunnel: add API endpoints
Stefan Hanreich
s.hanreich at proxmox.com
Fri Sep 30 13:52:33 CEST 2022
On 9/28/22 14:50, Fabian Grünbichler wrote:
> the following two endpoints are used for migration on the remote side
>
> POST /nodes/NODE/qemu/VMID/mtunnel
>
> which creates and locks an empty VM config, and spawns the main qmtunnel
> worker which binds to a VM-specific UNIX socket.
>
> this worker handles JSON-encoded migration commands coming in via this
> UNIX socket:
> - config (set target VM config)
> -- checks permissions for updating config
> -- strips pending changes and snapshots
> -- sets (optional) firewall config
> - disk (allocate disk for NBD migration)
> -- checks permission for target storage
> -- returns drive string for allocated volume
> - disk-import, query-disk-import, bwlimit
> -- handled by PVE::StorageTunnel
> - start (returning migration info)
> - fstrim (via agent)
> - ticket (creates a ticket for a WS connection to a specific socket)
> - resume
> - stop
> - nbdstop
> - unlock
> - quit (+ cleanup)
>
> this worker serves as a replacement for both 'qm mtunnel' and various
> manual calls via SSH. the API call will return a ticket valid for
> connecting to the worker's UNIX socket via a websocket connection.
>
> GET+WebSocket upgrade /nodes/NODE/qemu/VMID/mtunnelwebsocket
>
> gets called for connecting to a UNIX socket via websocket forwarding,
> i.e. once for the main command mtunnel, and once each for the memory
> migration and each NBD drive-mirror/storage migration.
>
> access is guarded by a short-lived ticket binding the authenticated user
> to the socket path. such tickets can be requested over the main mtunnel,
> which keeps track of socket paths currently used by that
> mtunnel/migration instance.
>
> each command handler should check privileges for the requested action if
> necessary.
>
> both mtunnel and mtunnelwebsocket endpoints are not proxied, the
> client/caller is responsible for ensuring the passed 'node' parameter
> and the endpoint handling the call are matching.
>
> Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
> ---
>
> Notes:
> v6:
> - check for Sys.Incoming in mtunnel
> - add definedness checks in 'config' command
> - switch to vm_running_locally in 'resume' command
> - moved $socket_addr closer to usage
> v5:
> - us vm_running_locally
> - move '$socket_addr' declaration closer to usage
> v4:
> - add timeout to accept()
> - move 'bwlimit' to PVE::StorageTunnel and extend it
> - mark mtunnel(websocket) as non-proxied, and check $node accordingly
> v3:
> - handle meta and vmgenid better
> - handle failure of 'config' updating
> - move 'disk-import' and 'query-disk-import' handlers to pve-guest-common
> - improve tunnel exit by letting client close the connection
> - use strict VM config parser
> v2: incorporated Fabian Ebner's feedback, mainly:
> - use modified nbd alloc helper instead of duplicating
> - fix disk cleanup, also cleanup imported disks
> - fix firewall-conf vs firewall-config mismatch
>
> requires
> - pve-access-control with tunnel ticket support (already marked in d/control)
> - pve-access-control with Sys.Incoming privilege (not yet applied/bumped!)
> - pve-http-server with websocket fixes (could be done via breaks? or bumped in
> pve-manager..)
>
> PVE/API2/Qemu.pm | 527 ++++++++++++++++++++++++++++++++++++++++++++++-
> debian/control | 2 +-
> 2 files changed, 527 insertions(+), 2 deletions(-)
>
> diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
> index 3ec31c26..9270ca74 100644
> --- a/PVE/API2/Qemu.pm
> +++ b/PVE/API2/Qemu.pm
> @@ -4,10 +4,13 @@ use strict;
> use warnings;
> use Cwd 'abs_path';
> use Net::SSLeay;
> -use POSIX;
> use IO::Socket::IP;
> +use IO::Socket::UNIX;
> +use IPC::Open3;
> +use JSON;
> use URI::Escape;
> use Crypt::OpenSSL::Random;
> +use Socket qw(SOCK_STREAM);
>
> use PVE::Cluster qw (cfs_read_file cfs_write_file);;
> use PVE::RRD;
> @@ -38,6 +41,7 @@ use PVE::VZDump::Plugin;
> use PVE::DataCenterConfig;
> use PVE::SSHInfo;
> use PVE::Replication;
> +use PVE::StorageTunnel;
>
> BEGIN {
> if (!$ENV{PVE_GENERATING_DOCS}) {
> @@ -1087,6 +1091,7 @@ __PACKAGE__->register_method({
> { subdir => 'spiceproxy' },
> { subdir => 'sendkey' },
> { subdir => 'firewall' },
> + { subdir => 'mtunnel' },
> ];
>
> return $res;
> @@ -4965,4 +4970,524 @@ __PACKAGE__->register_method({
> return PVE::QemuServer::Cloudinit::dump_cloudinit_config($conf, $param->{vmid}, $param->{type});
> }});
>
> +__PACKAGE__->register_method({
> + name => 'mtunnel',
> + path => '{vmid}/mtunnel',
> + method => 'POST',
> + protected => 1,
> + description => 'Migration tunnel endpoint - only for internal use by VM migration.',
> + permissions => {
> + check =>
> + [ 'and',
> + ['perm', '/vms/{vmid}', [ 'VM.Allocate' ]],
> + ['perm', '/', [ 'Sys.Incoming' ]],
> + ],
> + description => "You need 'VM.Allocate' permissions on '/vms/{vmid}' and Sys.Incoming" .
> + " on '/'. Further permission checks happen during the actual migration.",
> + },
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + vmid => get_standard_option('pve-vmid'),
> + storages => {
> + type => 'string',
> + format => 'pve-storage-id-list',
> + optional => 1,
> + description => 'List of storages to check permission and availability. Will be checked again for all actually used storages during migration.',
> + },
> + },
> + },
> + returns => {
> + additionalProperties => 0,
> + properties => {
> + upid => { type => 'string' },
> + ticket => { type => 'string' },
> + socket => { type => 'string' },
> + },
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + my $rpcenv = PVE::RPCEnvironment::get();
> + my $authuser = $rpcenv->get_user();
> +
> + my $node = extract_param($param, 'node');
> + my $vmid = extract_param($param, 'vmid');
> +
> + my $storages = extract_param($param, 'storages');
> +
> + my $nodename = PVE::INotify::nodename();
> +
> + raise_param_exc({ node => "node needs to be 'localhost' or local hostname '$nodename'" })
> + if $node ne 'localhost' && $node ne $nodename;
> +
> + $node = $nodename;
> +
> + my $storecfg = PVE::Storage::config();
> + foreach my $storeid (PVE::Tools::split_list($storages)) {
> + $check_storage_access_migrate->($rpcenv, $authuser, $storecfg, $storeid, $node);
> + }
> +
> + PVE::Cluster::check_cfs_quorum();
> +
> + my $lock = 'create';
> + eval { PVE::QemuConfig->create_and_lock_config($vmid, 0, $lock); };
> +
> + raise_param_exc({ vmid => "unable to create empty VM config - $@"})
> + if $@;
> +
> + my $realcmd = sub {
> + my $state = {
> + storecfg => PVE::Storage::config(),
> + lock => $lock,
> + vmid => $vmid,
> + };
> +
> + my $run_locked = sub {
> + my ($code, $params) = @_;
> + return PVE::QemuConfig->lock_config($state->{vmid}, sub {
> + my $conf = PVE::QemuConfig->load_config($state->{vmid});
> +
> + $state->{conf} = $conf;
> +
> + die "Encountered wrong lock - aborting mtunnel command handling.\n"
> + if $state->{lock} && !PVE::QemuConfig->has_lock($conf, $state->{lock});
> +
> + return $code->($params);
> + });
> + };
> +
> + my $cmd_desc = {
> + config => {
> + conf => {
> + type => 'string',
> + description => 'Full VM config, adapted for target cluster/node',
> + },
> + 'firewall-config' => {
> + type => 'string',
> + description => 'VM firewall config',
> + optional => 1,
> + },
> + },
> + disk => {
> + format => PVE::JSONSchema::get_standard_option('pve-qm-image-format'),
> + storage => {
> + type => 'string',
> + format => 'pve-storage-id',
> + },
> + drive => {
> + type => 'object',
> + description => 'parsed drive information without volid and format',
> + },
> + },
> + start => {
> + start_params => {
> + type => 'object',
> + description => 'params passed to vm_start_nolock',
> + },
> + migrate_opts => {
> + type => 'object',
> + description => 'migrate_opts passed to vm_start_nolock',
> + },
> + },
> + ticket => {
> + path => {
> + type => 'string',
> + description => 'socket path for which the ticket should be valid. must be known to current mtunnel instance.',
> + },
> + },
> + quit => {
> + cleanup => {
> + type => 'boolean',
> + description => 'remove VM config and disks, aborting migration',
> + default => 0,
> + },
> + },
> + 'disk-import' => $PVE::StorageTunnel::cmd_schema->{'disk-import'},
> + 'query-disk-import' => $PVE::StorageTunnel::cmd_schema->{'query-disk-import'},
> + bwlimit => $PVE::StorageTunnel::cmd_schema->{bwlimit},
> + };
> +
> + my $cmd_handlers = {
> + 'version' => sub {
> + # compared against other end's version
> + # bump/reset for breaking changes
> + # bump/bump for opt-in changes
> + return {
> + api => 2,
> + age => 0,
> + };
> + },
> + 'config' => sub {
> + my ($params) = @_;
> +
> + # parse and write out VM FW config if given
> + if (my $fw_conf = $params->{'firewall-config'}) {
> + my ($path, $fh) = PVE::Tools::tempfile_contents($fw_conf, 700);
> +
> + my $empty_conf = {
> + rules => [],
> + options => {},
> + aliases => {},
> + ipset => {} ,
> + ipset_comments => {},
> + };
> + my $cluster_fw_conf = PVE::Firewall::load_clusterfw_conf();
> +
> + # TODO: add flag for strict parsing?
> + # TODO: add import sub that does all this given raw content?
> + my $vmfw_conf = PVE::Firewall::generic_fw_config_parser($path, $cluster_fw_conf, $empty_conf, 'vm');
> + $vmfw_conf->{vmid} = $state->{vmid};
> + PVE::Firewall::save_vmfw_conf($state->{vmid}, $vmfw_conf);
> +
> + $state->{cleanup}->{fw} = 1;
> + }
> +
> + my $conf_fn = "incoming/qemu-server/$state->{vmid}.conf";
> + my $new_conf = PVE::QemuServer::parse_vm_config($conf_fn, $params->{conf}, 1);
> + delete $new_conf->{lock};
> + delete $new_conf->{digest};
> +
> + # TODO handle properly?
> + delete $new_conf->{snapshots};
> + delete $new_conf->{parent};
> + delete $new_conf->{pending};
> +
> + # not handled by update_vm_api
> + my $vmgenid = delete $new_conf->{vmgenid};
> + my $meta = delete $new_conf->{meta};
> +
> + $new_conf->{vmid} = $state->{vmid};
> + $new_conf->{node} = $node;
> +
> + PVE::QemuConfig->remove_lock($state->{vmid}, 'create');
> +
> + eval {
> + $update_vm_api->($new_conf, 1);
> + };
> + if (my $err = $@) {
> + # revert to locked previous config
> + my $conf = PVE::QemuConfig->load_config($state->{vmid});
> + $conf->{lock} = 'create';
> + PVE::QemuConfig->write_config($state->{vmid}, $conf);
> +
> + die $err;
> + }
> +
> + my $conf = PVE::QemuConfig->load_config($state->{vmid});
> + $conf->{lock} = 'migrate';
> + $conf->{vmgenid} = $vmgenid if defined($vmgenid);
> + $conf->{meta} = $meta if defined($meta);
> + PVE::QemuConfig->write_config($state->{vmid}, $conf);
> +
> + $state->{lock} = 'migrate';
> +
> + return;
> + },
> + 'bwlimit' => sub {
> + my ($params) = @_;
> + return PVE::StorageTunnel::handle_bwlimit($params);
> + },
> + 'disk' => sub {
> + my ($params) = @_;
> +
> + my $format = $params->{format};
> + my $storeid = $params->{storage};
> + my $drive = $params->{drive};
> +
> + $check_storage_access_migrate->($rpcenv, $authuser, $state->{storecfg}, $storeid, $node);
> +
> + my $storagemap = {
> + default => $storeid,
> + };
> +
> + my $source_volumes = {
> + 'disk' => [
> + undef,
> + $storeid,
> + undef,
> + $drive,
> + 0,
> + $format,
> + ],
> + };
> +
> + my $res = PVE::QemuServer::vm_migrate_alloc_nbd_disks($state->{storecfg}, $state->{vmid}, $source_volumes, $storagemap);
> + if (defined($res->{disk})) {
> + $state->{cleanup}->{volumes}->{$res->{disk}->{volid}} = 1;
> + return $res->{disk};
> + } else {
> + die "failed to allocate NBD disk..\n";
> + }
> + },
> + 'disk-import' => sub {
> + my ($params) = @_;
> +
> + $check_storage_access_migrate->(
> + $rpcenv,
> + $authuser,
> + $state->{storecfg},
> + $params->{storage},
> + $node
> + );
> +
> + $params->{unix} = "/run/qemu-server/$state->{vmid}.storage";
> +
> + return PVE::StorageTunnel::handle_disk_import($state, $params);
> + },
> + 'query-disk-import' => sub {
> + my ($params) = @_;
> +
> + return PVE::StorageTunnel::handle_query_disk_import($state, $params);
> + },
> + 'start' => sub {
> + my ($params) = @_;
> +
> + my $info = PVE::QemuServer::vm_start_nolock(
> + $state->{storecfg},
> + $state->{vmid},
> + $state->{conf},
> + $params->{start_params},
> + $params->{migrate_opts},
> + );
> +
> +
> + if ($info->{migrate}->{proto} ne 'unix') {
> + PVE::QemuServer::vm_stop(undef, $state->{vmid}, 1, 1);
> + die "migration over non-UNIX sockets not possible\n";
> + }
> +
> + my $socket = $info->{migrate}->{addr};
> + chown $state->{socket_uid}, -1, $socket;
> + $state->{sockets}->{$socket} = 1;
> +
> + my $unix_sockets = $info->{migrate}->{unix_sockets};
> + foreach my $socket (@$unix_sockets) {
> + chown $state->{socket_uid}, -1, $socket;
> + $state->{sockets}->{$socket} = 1;
> + }
> + return $info;
> + },
> + 'fstrim' => sub {
> + if (PVE::QemuServer::qga_check_running($state->{vmid})) {
> + eval { mon_cmd($state->{vmid}, "guest-fstrim") };
> + warn "fstrim failed: $@\n" if $@;
> + }
> + return;
> + },
> + 'stop' => sub {
> + PVE::QemuServer::vm_stop(undef, $state->{vmid}, 1, 1);
> + return;
> + },
> + 'nbdstop' => sub {
> + PVE::QemuServer::nbd_stop($state->{vmid});
> + return;
> + },
> + 'resume' => sub {
> + if (PVE::QemuServer::Helpers::vm_running_locally($state->{vmid})) {
> + PVE::QemuServer::vm_resume($state->{vmid}, 1, 1);
> + } else {
> + die "VM $state->{vmid} not running\n";
> + }
> + return;
> + },
> + 'unlock' => sub {
> + PVE::QemuConfig->remove_lock($state->{vmid}, $state->{lock});
> + delete $state->{lock};
> + return;
> + },
> + 'ticket' => sub {
> + my ($params) = @_;
> +
> + my $path = $params->{path};
> +
> + die "Not allowed to generate ticket for unknown socket '$path'\n"
> + if !defined($state->{sockets}->{$path});
> +
> + return { ticket => PVE::AccessControl::assemble_tunnel_ticket($authuser, "/socket/$path") };
> + },
> + 'quit' => sub {
> + my ($params) = @_;
> +
> + if ($params->{cleanup}) {
> + if ($state->{cleanup}->{fw}) {
> + PVE::Firewall::remove_vmfw_conf($state->{vmid});
> + }
> +
> + for my $volid (keys $state->{cleanup}->{volumes}->%*) {
> + print "freeing volume '$volid' as part of cleanup\n";
> + eval { PVE::Storage::vdisk_free($state->{storecfg}, $volid) };
> + warn $@ if $@;
> + }
> +
> + PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
> + }
> +
> + print "switching to exit-mode, waiting for client to disconnect\n";
> + $state->{exit} = 1;
> + return;
> + },
> + };
> +
> + $run_locked->(sub {
> + my $socket_addr = "/run/qemu-server/$state->{vmid}.mtunnel";
> + unlink $socket_addr;
> +
> + $state->{socket} = IO::Socket::UNIX->new(
> + Type => SOCK_STREAM(),
> + Local => $socket_addr,
> + Listen => 1,
> + );
> +
> + $state->{socket_uid} = getpwnam('www-data')
> + or die "Failed to resolve user 'www-data' to numeric UID\n";
> + chown $state->{socket_uid}, -1, $socket_addr;
> + });
> +
> + print "mtunnel started\n";
> +
> + my $conn = eval { PVE::Tools::run_with_timeout(300, sub { $state->{socket}->accept() }) };
> + if ($@) {
> + warn "Failed to accept tunnel connection - $@\n";
> +
> + warn "Removing tunnel socket..\n";
> + unlink $state->{socket};
> +
> + warn "Removing temporary VM config..\n";
> + $run_locked->(sub {
> + PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
> + });
> +
> + die "Exiting mtunnel\n";
> + }
> +
> + $state->{conn} = $conn;
> +
> + my $reply_err = sub {
> + my ($msg) = @_;
> +
> + my $reply = JSON::encode_json({
> + success => JSON::false,
> + msg => $msg,
> + });
> + $conn->print("$reply\n");
> + $conn->flush();
> + };
> +
> + my $reply_ok = sub {
> + my ($res) = @_;
> +
> + $res->{success} = JSON::true;
> + my $reply = JSON::encode_json($res);
> + $conn->print("$reply\n");
> + $conn->flush();
> + };
> +
> + while (my $line = <$conn>) {
> + chomp $line;
> +
> + # untaint, we validate below if needed
> + ($line) = $line =~ /^(.*)$/;
> + my $parsed = eval { JSON::decode_json($line) };
> + if ($@) {
> + $reply_err->("failed to parse command - $@");
> + next;
> + }
> +
> + my $cmd = delete $parsed->{cmd};
> + if (!defined($cmd)) {
> + $reply_err->("'cmd' missing");
> + } elsif ($state->{exit}) {
> + $reply_err->("tunnel is in exit-mode, processing '$cmd' cmd not possible");
> + next;
> + } elsif (my $handler = $cmd_handlers->{$cmd}) {
> + print "received command '$cmd'\n";
> + eval {
> + if ($cmd_desc->{$cmd}) {
> + PVE::JSONSchema::validate($cmd_desc->{$cmd}, $parsed);
might the params be flipped here?
> + } else {
> + $parsed = {};
> + }
> + my $res = $run_locked->($handler, $parsed);
> + $reply_ok->($res);
> + };
> + $reply_err->("failed to handle '$cmd' command - $@")
> + if $@;
> + } else {
> + $reply_err->("unknown command '$cmd' given");
> + }
> + }
> +
> + if ($state->{exit}) {
> + print "mtunnel exited\n";
> + } else {
> + die "mtunnel exited unexpectedly\n";
> + }
> + };
> +
> + my $socket_addr = "/run/qemu-server/$vmid.mtunnel";
> + my $ticket = PVE::AccessControl::assemble_tunnel_ticket($authuser, "/socket/$socket_addr");
> + my $upid = $rpcenv->fork_worker('qmtunnel', $vmid, $authuser, $realcmd);
> +
> + return {
> + ticket => $ticket,
> + upid => $upid,
> + socket => $socket_addr,
> + };
> + }});
> +
> +__PACKAGE__->register_method({
> + name => 'mtunnelwebsocket',
> + path => '{vmid}/mtunnelwebsocket',
> + method => 'GET',
> + permissions => {
> + description => "You need to pass a ticket valid for the selected socket. Tickets can be created via the mtunnel API call, which will check permissions accordingly.",
> + user => 'all', # check inside
> + },
> + description => 'Migration tunnel endpoint for websocket upgrade - only for internal use by VM migration.',
> + parameters => {
> + additionalProperties => 0,
> + properties => {
> + node => get_standard_option('pve-node'),
> + vmid => get_standard_option('pve-vmid'),
> + socket => {
> + type => "string",
> + description => "unix socket to forward to",
> + },
> + ticket => {
> + type => "string",
> + description => "ticket return by initial 'mtunnel' API call, or retrieved via 'ticket' tunnel command",
> + },
> + },
> + },
> + returns => {
> + type => "object",
> + properties => {
> + port => { type => 'string', optional => 1 },
> + socket => { type => 'string', optional => 1 },
> + },
> + },
> + code => sub {
> + my ($param) = @_;
> +
> + my $rpcenv = PVE::RPCEnvironment::get();
> + my $authuser = $rpcenv->get_user();
> +
> + my $nodename = PVE::INotify::nodename();
> + my $node = extract_param($param, 'node');
> +
> + raise_param_exc({ node => "node needs to be 'localhost' or local hostname '$nodename'" })
> + if $node ne 'localhost' && $node ne $nodename;
> +
> + my $vmid = $param->{vmid};
> + # check VM exists
> + PVE::QemuConfig->load_config($vmid);
> +
> + my $socket = $param->{socket};
> + PVE::AccessControl::verify_tunnel_ticket($param->{ticket}, $authuser, "/socket/$socket");
> +
> + return { socket => $socket };
> + }});
> +
> 1;
> diff --git a/debian/control b/debian/control
> index a90ecd6f..ce469cbd 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -33,7 +33,7 @@ Depends: dbus,
> libjson-perl,
> libjson-xs-perl,
> libnet-ssleay-perl,
> - libpve-access-control (>= 5.0-7),
> + libpve-access-control (>= 7.0-7),
> libpve-cluster-perl,
> libpve-common-perl (>= 7.1-4),
> libpve-guest-common-perl (>= 4.1-1),
More information about the pve-devel
mailing list