[pve-devel] [PATCH qemu-server v7 3/7] mtunnel: add API endpoints
Fabian Grünbichler
f.gruenbichler at proxmox.com
Thu Nov 17 14:33:42 CET 2022
the following two endpoints are used for migration on the remote side
POST /nodes/NODE/qemu/VMID/mtunnel
which creates and locks an empty VM config, and spawns the main qmtunnel
worker which binds to a VM-specific UNIX socket.
this worker handles JSON-encoded migration commands coming in via this
UNIX socket:
- config (set target VM config)
-- checks permissions for updating config
-- strips pending changes and snapshots
-- sets (optional) firewall config
- disk (allocate disk for NBD migration)
-- checks permission for target storage
-- returns drive string for allocated volume
- disk-import, query-disk-import, bwlimit
-- handled by PVE::StorageTunnel
- start (returning migration info)
- fstrim (via agent)
- ticket (creates a ticket for a WS connection to a specific socket)
- resume
- stop
- nbdstop
- unlock
- quit (+ cleanup)
this worker serves as a replacement for both 'qm mtunnel' and various
manual calls via SSH. the API call will return a ticket valid for
connecting to the worker's UNIX socket via a websocket connection.
GET+WebSocket upgrade /nodes/NODE/qemu/VMID/mtunnelwebsocket
gets called for connecting to a UNIX socket via websocket forwarding,
i.e. once for the main command mtunnel, and once each for the memory
migration and each NBD drive-mirror/storage migration.
access is guarded by a short-lived ticket binding the authenticated user
to the socket path. such tickets can be requested over the main mtunnel,
which keeps track of socket paths currently used by that
mtunnel/migration instance.
each command handler should check privileges for the requested action if
necessary.
both mtunnel and mtunnelwebsocket endpoints are not proxied, the
client/caller is responsible for ensuring the passed 'node' parameter
and the endpoint handling the call are matching.
Signed-off-by: Fabian Grünbichler <f.gruenbichler at proxmox.com>
---
Notes:
v7:
- fix parameter order when parsing command (thanks Stefan Hanreich!)
- pass-through $conf->{cloudinit} and tell update_vm_api to not regenerate the
cloudinit image
- bump d/control libpve-access-control dependency for Sys.Incoming privilege
v6:
- check for Sys.Incoming in mtunnel
- add definedness checks in 'config' command
- switch to vm_running_locally in 'resume' command
- moved $socket_addr closer to usage
v5:
- us vm_running_locally
- move '$socket_addr' declaration closer to usage
v4:
- add timeout to accept()
- move 'bwlimit' to PVE::StorageTunnel and extend it
- mark mtunnel(websocket) as non-proxied, and check $node accordingly
v3:
- handle meta and vmgenid better
- handle failure of 'config' updating
- move 'disk-import' and 'query-disk-import' handlers to pve-guest-common
- improve tunnel exit by letting client close the connection
- use strict VM config parser
v2: incorporated Fabian Ebner's feedback, mainly:
- use modified nbd alloc helper instead of duplicating
- fix disk cleanup, also cleanup imported disks
- fix firewall-conf vs firewall-config mismatch
soft-requires
- pve-http-server with websocket fixes (could be done via breaks? or bumped in
pve-manager, or ignored..)
PVE/API2/Qemu.pm | 535 ++++++++++++++++++++++++++++++++++++++++++++++-
debian/control | 2 +-
2 files changed, 534 insertions(+), 3 deletions(-)
diff --git a/PVE/API2/Qemu.pm b/PVE/API2/Qemu.pm
index 0bb2d147..be84ff58 100644
--- a/PVE/API2/Qemu.pm
+++ b/PVE/API2/Qemu.pm
@@ -4,10 +4,13 @@ use strict;
use warnings;
use Cwd 'abs_path';
use Net::SSLeay;
-use POSIX;
use IO::Socket::IP;
+use IO::Socket::UNIX;
+use IPC::Open3;
+use JSON;
use URI::Escape;
use Crypt::OpenSSL::Random;
+use Socket qw(SOCK_STREAM);
use PVE::Cluster qw (cfs_read_file cfs_write_file);;
use PVE::RRD;
@@ -39,6 +42,7 @@ use PVE::VZDump::Plugin;
use PVE::DataCenterConfig;
use PVE::SSHInfo;
use PVE::Replication;
+use PVE::StorageTunnel;
BEGIN {
if (!$ENV{PVE_GENERATING_DOCS}) {
@@ -1092,6 +1096,7 @@ __PACKAGE__->register_method({
{ subdir => 'spiceproxy' },
{ subdir => 'sendkey' },
{ subdir => 'firewall' },
+ { subdir => 'mtunnel' },
];
return $res;
@@ -1447,6 +1452,8 @@ my $update_vm_api = sub {
my $background_delay = extract_param($param, 'background_delay');
+ my $skip_cloud_init = extract_param($param, 'skip_cloud_init');
+
if (defined(my $cipassword = $param->{cipassword})) {
# Same logic as in cloud-init (but with the regex fixed...)
$param->{cipassword} = PVE::Tools::encrypt_pw($cipassword)
@@ -1804,7 +1811,8 @@ my $update_vm_api = sub {
if ($running) {
PVE::QemuServer::vmconfig_hotplug_pending($vmid, $conf, $storecfg, $modified, $errors);
} else {
- PVE::QemuServer::vmconfig_apply_pending($vmid, $conf, $storecfg, $errors);
+ # cloud_init must be skipped if we are in an incoming, remote live migration
+ PVE::QemuServer::vmconfig_apply_pending($vmid, $conf, $storecfg, $errors, $skip_cloud_init);
}
raise_param_exc($errors) if scalar(keys %$errors);
@@ -5099,4 +5107,527 @@ __PACKAGE__->register_method({
return PVE::QemuServer::Cloudinit::dump_cloudinit_config($conf, $param->{vmid}, $param->{type});
}});
+__PACKAGE__->register_method({
+ name => 'mtunnel',
+ path => '{vmid}/mtunnel',
+ method => 'POST',
+ protected => 1,
+ description => 'Migration tunnel endpoint - only for internal use by VM migration.',
+ permissions => {
+ check =>
+ [ 'and',
+ ['perm', '/vms/{vmid}', [ 'VM.Allocate' ]],
+ ['perm', '/', [ 'Sys.Incoming' ]],
+ ],
+ description => "You need 'VM.Allocate' permissions on '/vms/{vmid}' and Sys.Incoming" .
+ " on '/'. Further permission checks happen during the actual migration.",
+ },
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ vmid => get_standard_option('pve-vmid'),
+ storages => {
+ type => 'string',
+ format => 'pve-storage-id-list',
+ optional => 1,
+ description => 'List of storages to check permission and availability. Will be checked again for all actually used storages during migration.',
+ },
+ },
+ },
+ returns => {
+ additionalProperties => 0,
+ properties => {
+ upid => { type => 'string' },
+ ticket => { type => 'string' },
+ socket => { type => 'string' },
+ },
+ },
+ code => sub {
+ my ($param) = @_;
+
+ my $rpcenv = PVE::RPCEnvironment::get();
+ my $authuser = $rpcenv->get_user();
+
+ my $node = extract_param($param, 'node');
+ my $vmid = extract_param($param, 'vmid');
+
+ my $storages = extract_param($param, 'storages');
+
+ my $nodename = PVE::INotify::nodename();
+
+ raise_param_exc({ node => "node needs to be 'localhost' or local hostname '$nodename'" })
+ if $node ne 'localhost' && $node ne $nodename;
+
+ $node = $nodename;
+
+ my $storecfg = PVE::Storage::config();
+ foreach my $storeid (PVE::Tools::split_list($storages)) {
+ $check_storage_access_migrate->($rpcenv, $authuser, $storecfg, $storeid, $node);
+ }
+
+ PVE::Cluster::check_cfs_quorum();
+
+ my $lock = 'create';
+ eval { PVE::QemuConfig->create_and_lock_config($vmid, 0, $lock); };
+
+ raise_param_exc({ vmid => "unable to create empty VM config - $@"})
+ if $@;
+
+ my $realcmd = sub {
+ my $state = {
+ storecfg => PVE::Storage::config(),
+ lock => $lock,
+ vmid => $vmid,
+ };
+
+ my $run_locked = sub {
+ my ($code, $params) = @_;
+ return PVE::QemuConfig->lock_config($state->{vmid}, sub {
+ my $conf = PVE::QemuConfig->load_config($state->{vmid});
+
+ $state->{conf} = $conf;
+
+ die "Encountered wrong lock - aborting mtunnel command handling.\n"
+ if $state->{lock} && !PVE::QemuConfig->has_lock($conf, $state->{lock});
+
+ return $code->($params);
+ });
+ };
+
+ my $cmd_desc = {
+ config => {
+ conf => {
+ type => 'string',
+ description => 'Full VM config, adapted for target cluster/node',
+ },
+ 'firewall-config' => {
+ type => 'string',
+ description => 'VM firewall config',
+ optional => 1,
+ },
+ },
+ disk => {
+ format => PVE::JSONSchema::get_standard_option('pve-qm-image-format'),
+ storage => {
+ type => 'string',
+ format => 'pve-storage-id',
+ },
+ drive => {
+ type => 'object',
+ description => 'parsed drive information without volid and format',
+ },
+ },
+ start => {
+ start_params => {
+ type => 'object',
+ description => 'params passed to vm_start_nolock',
+ },
+ migrate_opts => {
+ type => 'object',
+ description => 'migrate_opts passed to vm_start_nolock',
+ },
+ },
+ ticket => {
+ path => {
+ type => 'string',
+ description => 'socket path for which the ticket should be valid. must be known to current mtunnel instance.',
+ },
+ },
+ quit => {
+ cleanup => {
+ type => 'boolean',
+ description => 'remove VM config and disks, aborting migration',
+ default => 0,
+ },
+ },
+ 'disk-import' => $PVE::StorageTunnel::cmd_schema->{'disk-import'},
+ 'query-disk-import' => $PVE::StorageTunnel::cmd_schema->{'query-disk-import'},
+ bwlimit => $PVE::StorageTunnel::cmd_schema->{bwlimit},
+ };
+
+ my $cmd_handlers = {
+ 'version' => sub {
+ # compared against other end's version
+ # bump/reset for breaking changes
+ # bump/bump for opt-in changes
+ return {
+ api => 2,
+ age => 0,
+ };
+ },
+ 'config' => sub {
+ my ($params) = @_;
+
+ # parse and write out VM FW config if given
+ if (my $fw_conf = $params->{'firewall-config'}) {
+ my ($path, $fh) = PVE::Tools::tempfile_contents($fw_conf, 700);
+
+ my $empty_conf = {
+ rules => [],
+ options => {},
+ aliases => {},
+ ipset => {} ,
+ ipset_comments => {},
+ };
+ my $cluster_fw_conf = PVE::Firewall::load_clusterfw_conf();
+
+ # TODO: add flag for strict parsing?
+ # TODO: add import sub that does all this given raw content?
+ my $vmfw_conf = PVE::Firewall::generic_fw_config_parser($path, $cluster_fw_conf, $empty_conf, 'vm');
+ $vmfw_conf->{vmid} = $state->{vmid};
+ PVE::Firewall::save_vmfw_conf($state->{vmid}, $vmfw_conf);
+
+ $state->{cleanup}->{fw} = 1;
+ }
+
+ my $conf_fn = "incoming/qemu-server/$state->{vmid}.conf";
+ my $new_conf = PVE::QemuServer::parse_vm_config($conf_fn, $params->{conf}, 1);
+ delete $new_conf->{lock};
+ delete $new_conf->{digest};
+
+ # TODO handle properly?
+ delete $new_conf->{snapshots};
+ delete $new_conf->{parent};
+ delete $new_conf->{pending};
+
+ # not handled by update_vm_api
+ my $vmgenid = delete $new_conf->{vmgenid};
+ my $meta = delete $new_conf->{meta};
+ my $cloudinit = delete $new_conf->{cloudinit}; # this is informational only
+ $new_conf->{skip_cloud_init} = 1; # re-use image from source side
+
+ $new_conf->{vmid} = $state->{vmid};
+ $new_conf->{node} = $node;
+
+ PVE::QemuConfig->remove_lock($state->{vmid}, 'create');
+
+ eval {
+ $update_vm_api->($new_conf, 1);
+ };
+ if (my $err = $@) {
+ # revert to locked previous config
+ my $conf = PVE::QemuConfig->load_config($state->{vmid});
+ $conf->{lock} = 'create';
+ PVE::QemuConfig->write_config($state->{vmid}, $conf);
+
+ die $err;
+ }
+
+ my $conf = PVE::QemuConfig->load_config($state->{vmid});
+ $conf->{lock} = 'migrate';
+ $conf->{vmgenid} = $vmgenid if defined($vmgenid);
+ $conf->{meta} = $meta if defined($meta);
+ $conf->{cloudinit} = $cloudinit if defined($cloudinit);
+ PVE::QemuConfig->write_config($state->{vmid}, $conf);
+
+ $state->{lock} = 'migrate';
+
+ return;
+ },
+ 'bwlimit' => sub {
+ my ($params) = @_;
+ return PVE::StorageTunnel::handle_bwlimit($params);
+ },
+ 'disk' => sub {
+ my ($params) = @_;
+
+ my $format = $params->{format};
+ my $storeid = $params->{storage};
+ my $drive = $params->{drive};
+
+ $check_storage_access_migrate->($rpcenv, $authuser, $state->{storecfg}, $storeid, $node);
+
+ my $storagemap = {
+ default => $storeid,
+ };
+
+ my $source_volumes = {
+ 'disk' => [
+ undef,
+ $storeid,
+ undef,
+ $drive,
+ 0,
+ $format,
+ ],
+ };
+
+ my $res = PVE::QemuServer::vm_migrate_alloc_nbd_disks($state->{storecfg}, $state->{vmid}, $source_volumes, $storagemap);
+ if (defined($res->{disk})) {
+ $state->{cleanup}->{volumes}->{$res->{disk}->{volid}} = 1;
+ return $res->{disk};
+ } else {
+ die "failed to allocate NBD disk..\n";
+ }
+ },
+ 'disk-import' => sub {
+ my ($params) = @_;
+
+ $check_storage_access_migrate->(
+ $rpcenv,
+ $authuser,
+ $state->{storecfg},
+ $params->{storage},
+ $node
+ );
+
+ $params->{unix} = "/run/qemu-server/$state->{vmid}.storage";
+
+ return PVE::StorageTunnel::handle_disk_import($state, $params);
+ },
+ 'query-disk-import' => sub {
+ my ($params) = @_;
+
+ return PVE::StorageTunnel::handle_query_disk_import($state, $params);
+ },
+ 'start' => sub {
+ my ($params) = @_;
+
+ my $info = PVE::QemuServer::vm_start_nolock(
+ $state->{storecfg},
+ $state->{vmid},
+ $state->{conf},
+ $params->{start_params},
+ $params->{migrate_opts},
+ );
+
+
+ if ($info->{migrate}->{proto} ne 'unix') {
+ PVE::QemuServer::vm_stop(undef, $state->{vmid}, 1, 1);
+ die "migration over non-UNIX sockets not possible\n";
+ }
+
+ my $socket = $info->{migrate}->{addr};
+ chown $state->{socket_uid}, -1, $socket;
+ $state->{sockets}->{$socket} = 1;
+
+ my $unix_sockets = $info->{migrate}->{unix_sockets};
+ foreach my $socket (@$unix_sockets) {
+ chown $state->{socket_uid}, -1, $socket;
+ $state->{sockets}->{$socket} = 1;
+ }
+ return $info;
+ },
+ 'fstrim' => sub {
+ if (PVE::QemuServer::qga_check_running($state->{vmid})) {
+ eval { mon_cmd($state->{vmid}, "guest-fstrim") };
+ warn "fstrim failed: $@\n" if $@;
+ }
+ return;
+ },
+ 'stop' => sub {
+ PVE::QemuServer::vm_stop(undef, $state->{vmid}, 1, 1);
+ return;
+ },
+ 'nbdstop' => sub {
+ PVE::QemuServer::nbd_stop($state->{vmid});
+ return;
+ },
+ 'resume' => sub {
+ if (PVE::QemuServer::Helpers::vm_running_locally($state->{vmid})) {
+ PVE::QemuServer::vm_resume($state->{vmid}, 1, 1);
+ } else {
+ die "VM $state->{vmid} not running\n";
+ }
+ return;
+ },
+ 'unlock' => sub {
+ PVE::QemuConfig->remove_lock($state->{vmid}, $state->{lock});
+ delete $state->{lock};
+ return;
+ },
+ 'ticket' => sub {
+ my ($params) = @_;
+
+ my $path = $params->{path};
+
+ die "Not allowed to generate ticket for unknown socket '$path'\n"
+ if !defined($state->{sockets}->{$path});
+
+ return { ticket => PVE::AccessControl::assemble_tunnel_ticket($authuser, "/socket/$path") };
+ },
+ 'quit' => sub {
+ my ($params) = @_;
+
+ if ($params->{cleanup}) {
+ if ($state->{cleanup}->{fw}) {
+ PVE::Firewall::remove_vmfw_conf($state->{vmid});
+ }
+
+ for my $volid (keys $state->{cleanup}->{volumes}->%*) {
+ print "freeing volume '$volid' as part of cleanup\n";
+ eval { PVE::Storage::vdisk_free($state->{storecfg}, $volid) };
+ warn $@ if $@;
+ }
+
+ PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
+ }
+
+ print "switching to exit-mode, waiting for client to disconnect\n";
+ $state->{exit} = 1;
+ return;
+ },
+ };
+
+ $run_locked->(sub {
+ my $socket_addr = "/run/qemu-server/$state->{vmid}.mtunnel";
+ unlink $socket_addr;
+
+ $state->{socket} = IO::Socket::UNIX->new(
+ Type => SOCK_STREAM(),
+ Local => $socket_addr,
+ Listen => 1,
+ );
+
+ $state->{socket_uid} = getpwnam('www-data')
+ or die "Failed to resolve user 'www-data' to numeric UID\n";
+ chown $state->{socket_uid}, -1, $socket_addr;
+ });
+
+ print "mtunnel started\n";
+
+ my $conn = eval { PVE::Tools::run_with_timeout(300, sub { $state->{socket}->accept() }) };
+ if ($@) {
+ warn "Failed to accept tunnel connection - $@\n";
+
+ warn "Removing tunnel socket..\n";
+ unlink $state->{socket};
+
+ warn "Removing temporary VM config..\n";
+ $run_locked->(sub {
+ PVE::QemuServer::destroy_vm($state->{storecfg}, $state->{vmid}, 1);
+ });
+
+ die "Exiting mtunnel\n";
+ }
+
+ $state->{conn} = $conn;
+
+ my $reply_err = sub {
+ my ($msg) = @_;
+
+ my $reply = JSON::encode_json({
+ success => JSON::false,
+ msg => $msg,
+ });
+ $conn->print("$reply\n");
+ $conn->flush();
+ };
+
+ my $reply_ok = sub {
+ my ($res) = @_;
+
+ $res->{success} = JSON::true;
+ my $reply = JSON::encode_json($res);
+ $conn->print("$reply\n");
+ $conn->flush();
+ };
+
+ while (my $line = <$conn>) {
+ chomp $line;
+
+ # untaint, we validate below if needed
+ ($line) = $line =~ /^(.*)$/;
+ my $parsed = eval { JSON::decode_json($line) };
+ if ($@) {
+ $reply_err->("failed to parse command - $@");
+ next;
+ }
+
+ my $cmd = delete $parsed->{cmd};
+ if (!defined($cmd)) {
+ $reply_err->("'cmd' missing");
+ } elsif ($state->{exit}) {
+ $reply_err->("tunnel is in exit-mode, processing '$cmd' cmd not possible");
+ next;
+ } elsif (my $handler = $cmd_handlers->{$cmd}) {
+ print "received command '$cmd'\n";
+ eval {
+ if ($cmd_desc->{$cmd}) {
+ PVE::JSONSchema::validate($parsed, $cmd_desc->{$cmd});
+ } else {
+ $parsed = {};
+ }
+ my $res = $run_locked->($handler, $parsed);
+ $reply_ok->($res);
+ };
+ $reply_err->("failed to handle '$cmd' command - $@")
+ if $@;
+ } else {
+ $reply_err->("unknown command '$cmd' given");
+ }
+ }
+
+ if ($state->{exit}) {
+ print "mtunnel exited\n";
+ } else {
+ die "mtunnel exited unexpectedly\n";
+ }
+ };
+
+ my $socket_addr = "/run/qemu-server/$vmid.mtunnel";
+ my $ticket = PVE::AccessControl::assemble_tunnel_ticket($authuser, "/socket/$socket_addr");
+ my $upid = $rpcenv->fork_worker('qmtunnel', $vmid, $authuser, $realcmd);
+
+ return {
+ ticket => $ticket,
+ upid => $upid,
+ socket => $socket_addr,
+ };
+ }});
+
+__PACKAGE__->register_method({
+ name => 'mtunnelwebsocket',
+ path => '{vmid}/mtunnelwebsocket',
+ method => 'GET',
+ permissions => {
+ description => "You need to pass a ticket valid for the selected socket. Tickets can be created via the mtunnel API call, which will check permissions accordingly.",
+ user => 'all', # check inside
+ },
+ description => 'Migration tunnel endpoint for websocket upgrade - only for internal use by VM migration.',
+ parameters => {
+ additionalProperties => 0,
+ properties => {
+ node => get_standard_option('pve-node'),
+ vmid => get_standard_option('pve-vmid'),
+ socket => {
+ type => "string",
+ description => "unix socket to forward to",
+ },
+ ticket => {
+ type => "string",
+ description => "ticket return by initial 'mtunnel' API call, or retrieved via 'ticket' tunnel command",
+ },
+ },
+ },
+ returns => {
+ type => "object",
+ properties => {
+ port => { type => 'string', optional => 1 },
+ socket => { type => 'string', optional => 1 },
+ },
+ },
+ code => sub {
+ my ($param) = @_;
+
+ my $rpcenv = PVE::RPCEnvironment::get();
+ my $authuser = $rpcenv->get_user();
+
+ my $nodename = PVE::INotify::nodename();
+ my $node = extract_param($param, 'node');
+
+ raise_param_exc({ node => "node needs to be 'localhost' or local hostname '$nodename'" })
+ if $node ne 'localhost' && $node ne $nodename;
+
+ my $vmid = $param->{vmid};
+ # check VM exists
+ PVE::QemuConfig->load_config($vmid);
+
+ my $socket = $param->{socket};
+ PVE::AccessControl::verify_tunnel_ticket($param->{ticket}, $authuser, "/socket/$socket");
+
+ return { socket => $socket };
+ }});
+
1;
diff --git a/debian/control b/debian/control
index af11b8fe..a2a7ce48 100644
--- a/debian/control
+++ b/debian/control
@@ -33,7 +33,7 @@ Depends: dbus,
libjson-perl,
libjson-xs-perl,
libnet-ssleay-perl,
- libpve-access-control (>= 5.0-7),
+ libpve-access-control (>= 7.2-5),
libpve-cluster-perl,
libpve-common-perl (>= 7.2-5),
libpve-guest-common-perl (>= 4.2-2),
--
2.30.2
More information about the pve-devel
mailing list